aboutsummaryrefslogtreecommitdiffstats
path: root/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py')
-rw-r--r--jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py185
1 files changed, 185 insertions, 0 deletions
diff --git a/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py b/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py
new file mode 100644
index 0000000..aed39f3
--- /dev/null
+++ b/jython-tosca-parser/src/main/resources/Lib/site-packages/babel-2.3.4-py2.7.egg/babel/messages/jslexer.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+"""
+ babel.messages.jslexer
+ ~~~~~~~~~~~~~~~~~~~~~~
+
+ A simple JavaScript 1.5 lexer which is used for the JavaScript
+ extractor.
+
+ :copyright: (c) 2013 by the Babel Team.
+ :license: BSD, see LICENSE for more details.
+"""
+from collections import namedtuple
+import re
+from babel._compat import unichr
+
+operators = sorted([
+ '+', '-', '*', '%', '!=', '==', '<', '>', '<=', '>=', '=',
+ '+=', '-=', '*=', '%=', '<<', '>>', '>>>', '<<=', '>>=',
+ '>>>=', '&', '&=', '|', '|=', '&&', '||', '^', '^=', '(', ')',
+ '[', ']', '{', '}', '!', '--', '++', '~', ',', ';', '.', ':'
+], key=len, reverse=True)
+
+escapes = {'b': '\b', 'f': '\f', 'n': '\n', 'r': '\r', 't': '\t'}
+
+name_re = re.compile(r'[\w$_][\w\d$_]*', re.UNICODE)
+dotted_name_re = re.compile(r'[\w$_][\w\d$_.]*[\w\d$_.]', re.UNICODE)
+division_re = re.compile(r'/=?')
+regex_re = re.compile(r'/(?:[^/\\]*(?:\\.[^/\\]*)*)/[a-zA-Z]*(?s)')
+line_re = re.compile(r'(\r\n|\n|\r)')
+line_join_re = re.compile(r'\\' + line_re.pattern)
+uni_escape_re = re.compile(r'[a-fA-F0-9]{1,4}')
+
+Token = namedtuple('Token', 'type value lineno')
+
+_rules = [
+ (None, re.compile(r'\s+(?u)')),
+ (None, re.compile(r'<!--.*')),
+ ('linecomment', re.compile(r'//.*')),
+ ('multilinecomment', re.compile(r'/\*.*?\*/(?us)')),
+ ('dotted_name', dotted_name_re),
+ ('name', name_re),
+ ('number', re.compile(r'''(?x)(
+ (?:0|[1-9]\d*)
+ (\.\d+)?
+ ([eE][-+]?\d+)? |
+ (0x[a-fA-F0-9]+)
+ )''')),
+ ('jsx_tag', re.compile(r'<(?:/?)\w+.+?>', re.I)), # May be mangled in `get_rules`
+ ('operator', re.compile(r'(%s)' % '|'.join(map(re.escape, operators)))),
+ ('template_string', re.compile(r'''`(?:[^`\\]*(?:\\.[^`\\]*)*)`''', re.UNICODE)),
+ ('string', re.compile(r'''(?xs)(
+ '(?:[^'\\]*(?:\\.[^'\\]*)*)' |
+ "(?:[^"\\]*(?:\\.[^"\\]*)*)"
+ )'''))
+]
+
+
+def get_rules(jsx, dotted, template_string):
+ """
+ Get a tokenization rule list given the passed syntax options.
+
+ Internal to this module.
+ """
+ rules = []
+ for token_type, rule in _rules:
+ if not jsx and token_type and 'jsx' in token_type:
+ continue
+ if not template_string and token_type == 'template_string':
+ continue
+ if token_type == 'dotted_name':
+ if not dotted:
+ continue
+ token_type = 'name'
+ rules.append((token_type, rule))
+ return rules
+
+
+def indicates_division(token):
+ """A helper function that helps the tokenizer to decide if the current
+ token may be followed by a division operator.
+ """
+ if token.type == 'operator':
+ return token.value in (')', ']', '}', '++', '--')
+ return token.type in ('name', 'number', 'string', 'regexp')
+
+
+def unquote_string(string):
+ """Unquote a string with JavaScript rules. The string has to start with
+ string delimiters (``'``, ``"`` or the back-tick/grave accent (for template strings).)
+ """
+ assert string and string[0] == string[-1] and string[0] in '"\'`', \
+ 'string provided is not properly delimited'
+ string = line_join_re.sub('\\1', string[1:-1])
+ result = []
+ add = result.append
+ pos = 0
+
+ while 1:
+ # scan for the next escape
+ escape_pos = string.find('\\', pos)
+ if escape_pos < 0:
+ break
+ add(string[pos:escape_pos])
+
+ # check which character is escaped
+ next_char = string[escape_pos + 1]
+ if next_char in escapes:
+ add(escapes[next_char])
+
+ # unicode escapes. trie to consume up to four characters of
+ # hexadecimal characters and try to interpret them as unicode
+ # character point. If there is no such character point, put
+ # all the consumed characters into the string.
+ elif next_char in 'uU':
+ escaped = uni_escape_re.match(string, escape_pos + 2)
+ if escaped is not None:
+ escaped_value = escaped.group()
+ if len(escaped_value) == 4:
+ try:
+ add(unichr(int(escaped_value, 16)))
+ except ValueError:
+ pass
+ else:
+ pos = escape_pos + 6
+ continue
+ add(next_char + escaped_value)
+ pos = escaped.end()
+ continue
+ else:
+ add(next_char)
+
+ # bogus escape. Just remove the backslash.
+ else:
+ add(next_char)
+ pos = escape_pos + 2
+
+ if pos < len(string):
+ add(string[pos:])
+
+ return u''.join(result)
+
+
+def tokenize(source, jsx=True, dotted=True, template_string=True):
+ """
+ Tokenize JavaScript/JSX source. Returns a generator of tokens.
+
+ :param jsx: Enable (limited) JSX parsing.
+ :param dotted: Read dotted names as single name token.
+ :param template_string: Support ES6 template strings
+ """
+ may_divide = False
+ pos = 0
+ lineno = 1
+ end = len(source)
+ rules = get_rules(jsx=jsx, dotted=dotted, template_string=template_string)
+
+ while pos < end:
+ # handle regular rules first
+ for token_type, rule in rules:
+ match = rule.match(source, pos)
+ if match is not None:
+ break
+ # if we don't have a match we don't give up yet, but check for
+ # division operators or regular expression literals, based on
+ # the status of `may_divide` which is determined by the last
+ # processed non-whitespace token using `indicates_division`.
+ else:
+ if may_divide:
+ match = division_re.match(source, pos)
+ token_type = 'operator'
+ else:
+ match = regex_re.match(source, pos)
+ token_type = 'regexp'
+ if match is None:
+ # woops. invalid syntax. jump one char ahead and try again.
+ pos += 1
+ continue
+
+ token_value = match.group()
+ if token_type is not None:
+ token = Token(token_type, token_value, lineno)
+ may_divide = indicates_division(token)
+ yield token
+ lineno += len(line_re.findall(token_value))
+ pos = match.end()