Skip to content

Commit 5a29aec

Browse files
committed
Javascript: Support dotted names as keywords
Fixes #148
1 parent 3aa3f29 commit 5a29aec

File tree

4 files changed

+69
-3
lines changed

4 files changed

+69
-3
lines changed

babel/messages/extract.py

+9-3
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
462462
in the results
463463
:param options: a dictionary of additional options (optional)
464464
"""
465-
from babel.messages.jslexer import tokenize, unquote_string
465+
from babel.messages.jslexer import tokenize, tokenize_dotted, unquote_string
466466
funcname = message_lineno = None
467467
messages = []
468468
last_argument = None
@@ -472,7 +472,13 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
472472
last_token = None
473473
call_stack = -1
474474

475-
for token in tokenize(fileobj.read().decode(encoding)):
475+
tokenizer = ( # Don't bother using the more complex dotted logic if we don't need it
476+
tokenize_dotted
477+
if any('.' in kw for kw in keywords)
478+
else tokenize
479+
)
480+
481+
for token in tokenizer(fileobj.read().decode(encoding)):
476482
if token.type == 'operator' and token.value == '(':
477483
if funcname:
478484
message_lineno = token.lineno
@@ -558,7 +564,7 @@ def extract_javascript(fileobj, keywords, comment_tags, options):
558564
elif funcname and call_stack == -1:
559565
funcname = None
560566

561-
elif call_stack == -1 and token.type == 'name' and \
567+
elif call_stack == -1 and token.type in ('name', 'dotted_name') and \
562568
token.value in keywords and \
563569
(last_token is None or last_token.type != 'name' or
564570
last_token.value != 'function'):

babel/messages/jslexer.py

+37
Original file line numberDiff line numberDiff line change
@@ -164,3 +164,40 @@ def tokenize(source):
164164
yield token
165165
lineno += len(line_re.findall(token_value))
166166
pos = match.end()
167+
168+
169+
def tokenize_dotted(source):
170+
"""
171+
Tokenize JavaScript source, regrouping dotted names into dotted_name tokens.
172+
173+
Returns a generator of tokens.
174+
175+
:return: Iterable[tuple]
176+
"""
177+
DOT_OP = ("operator", ".")
178+
dotted_buf = []
179+
180+
def release_buf(dotted_buf):
181+
if not dotted_buf:
182+
return
183+
if len(dotted_buf) > 1: # Have something to join?
184+
dotted_name = "".join(buftok[1] for buftok in dotted_buf)
185+
yield Token("dotted_name", dotted_name, dotted_buf[0][2])
186+
else: # Otherwise just release the single token as it was
187+
yield dotted_buf[0]
188+
dotted_buf[:] = []
189+
190+
for tok in tokenize(source):
191+
if tok[0] == "name" and (not dotted_buf or dotted_buf[-1][:2] == DOT_OP):
192+
dotted_buf.append(tok)
193+
continue
194+
if tok[:2] == DOT_OP and (dotted_buf and dotted_buf[-1][0] == "name"):
195+
dotted_buf.append(tok)
196+
continue
197+
if dotted_buf: # Release captured tokens when reaching a noncapturable token
198+
for buftok in release_buf(dotted_buf):
199+
yield buftok
200+
yield tok
201+
202+
for buftok in release_buf(dotted_buf): # And release when reaching the end
203+
yield buftok

tests/messages/test_extract.py

+8
Original file line numberDiff line numberDiff line change
@@ -478,6 +478,14 @@ def test_misplaced_comments(self):
478478
self.assertEqual(u'no comment here', messages[2][2])
479479
self.assertEqual([], messages[2][3])
480480

481+
def test_dotted_keyword_extract(self):
482+
buf = BytesIO(b"msg1 = com.corporate.i18n.formatMessage('Insert coin to continue')")
483+
messages = list(
484+
extract.extract('javascript', buf, {"com.corporate.i18n.formatMessage": None}, [], {})
485+
)
486+
487+
assert messages == [(1, 'Insert coin to continue', [], None)]
488+
481489

482490
class ExtractTestCase(unittest.TestCase):
483491

tests/messages/test_jslexer.py

+15
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,18 @@
66
def test_unquote():
77
assert jslexer.unquote_string('""') == ''
88
assert jslexer.unquote_string(r'"h\u00ebllo"') == u"hëllo"
9+
10+
11+
def test_dotted_name():
12+
assert list(jslexer.tokenize_dotted("foo.bar(quux)")) == [
13+
('dotted_name', 'foo.bar', 1),
14+
('operator', '(', 1),
15+
('name', 'quux', 1),
16+
('operator', ')', 1)
17+
]
18+
19+
20+
def test_dotted_name_end():
21+
assert list(jslexer.tokenize_dotted("foo.bar")) == [
22+
('dotted_name', 'foo.bar', 1),
23+
]

0 commit comments

Comments
 (0)