Skip to content

Commit 50be29e

Browse files
committed
Allow extracting nested calls in Javascript
Currently the Javascript extractor does not support nested gettext calls at all. The extraction code was refactored a bit to resemble the Python code as much as possible and support this use-case.
1 parent 20464e1 commit 50be29e

File tree

2 files changed

+161
-75
lines changed

2 files changed

+161
-75
lines changed

babel/messages/extract.py

+129-75
Original file line numberDiff line numberDiff line change
@@ -721,54 +721,109 @@ def extract_javascript(
721721
:param lineno: line number offset (for parsing embedded fragments)
722722
"""
723723
from babel.messages.jslexer import Token, tokenize, unquote_string
724-
funcname = message_lineno = None
725-
messages = []
726-
last_argument = None
727-
translator_comments = []
728-
concatenate_next = False
724+
729725
encoding = options.get('encoding', 'utf-8')
730-
last_token = None
731-
call_stack = -1
732726
dotted = any('.' in kw for kw in keywords)
727+
last_token = None
728+
# Keep the stack of all function calls and its related contextual variables,
729+
# so we can handle nested gettext calls.
730+
function_stack: list[FunctionStackItem] = []
731+
# Keep track of whether we're in a class or function definition
732+
in_def = False
733+
# Keep track of whether we're in a block of translator comments
734+
in_translator_comments = False
735+
# Keep track of the last encountered translator comments
736+
translator_comments = []
737+
# Keep track of the (split) strings encountered
738+
message_buffer = []
739+
733740
for token in tokenize(
734741
fileobj.read().decode(encoding),
735742
jsx=options.get("jsx", True),
736743
template_string=options.get("template_string", True),
737744
dotted=dotted,
738745
lineno=lineno,
739746
):
740-
if ( # Turn keyword`foo` expressions into keyword("foo") calls:
741-
funcname and # have a keyword...
742-
(last_token and last_token.type == 'name') and # we've seen nothing after the keyword...
743-
token.type == 'template_string' # this is a template string
747+
if token.type == 'name' and token.value in ('class', 'function'):
748+
# We're entering a class or function definition
749+
in_def = True
750+
751+
elif in_def and token.type == 'operator' and token.value in ('(', '{'):
752+
# We're in a class or function definition and should not do anything
753+
in_def = False
754+
continue
755+
756+
elif (
757+
last_token
758+
and last_token.type == 'name'
759+
and last_token.value in keywords
760+
and token.type == 'template_string'
744761
):
745-
message_lineno = token.lineno
746-
messages = [unquote_string(token.value)]
747-
call_stack = 0
762+
# Turn keyword`foo` expressions into keyword("foo") function calls
763+
string_value = unquote_string(token.value)
764+
cur_translator_comments = translator_comments
765+
if function_stack and function_stack[-1].function_lineno == last_token.lineno:
766+
# If our current function call is on the same line as the previous one,
767+
# copy their translator comments, since they also apply to us.
768+
cur_translator_comments = function_stack[-1].translator_comments
769+
770+
# We add all information needed later for the current function call
771+
function_stack.append(FunctionStackItem(
772+
function_lineno=last_token.lineno,
773+
function_name=last_token.value,
774+
message_lineno=token.lineno,
775+
messages=[string_value],
776+
translator_comments=cur_translator_comments,
777+
))
778+
translator_comments = []
779+
780+
# We act as if we are closing the function call now
748781
token = Token('operator', ')', token.lineno)
749782

750-
if options.get('parse_template_string') and not funcname and token.type == 'template_string':
783+
if (
784+
options.get('parse_template_string')
785+
and (not last_token or last_token.type != 'name' or last_token.value not in keywords)
786+
and token.type == 'template_string'
787+
):
751788
yield from parse_template_string(token.value, keywords, comment_tags, options, token.lineno)
752789

753790
elif token.type == 'operator' and token.value == '(':
754-
if funcname:
755-
message_lineno = token.lineno
756-
call_stack += 1
791+
if last_token.type == 'name':
792+
# We're entering a function call
793+
cur_translator_comments = translator_comments
794+
if function_stack and function_stack[-1].function_lineno == token.lineno:
795+
# If our current function call is on the same line as the previous one,
796+
# copy their translator comments, since they also apply to us.
797+
cur_translator_comments = function_stack[-1].translator_comments
798+
799+
# We add all information needed later for the current function call
800+
function_stack.append(FunctionStackItem(
801+
function_lineno=token.lineno,
802+
function_name=last_token.value,
803+
message_lineno=None,
804+
messages=[],
805+
translator_comments=cur_translator_comments,
806+
))
807+
translator_comments = []
757808

758-
elif call_stack == -1 and token.type == 'linecomment':
809+
elif token.type == 'linecomment':
810+
# Strip the comment token from the line
759811
value = token.value[2:].strip()
760-
if translator_comments and \
761-
translator_comments[-1][0] == token.lineno - 1:
812+
if in_translator_comments and translator_comments[-1][0] == token.lineno - 1:
813+
# We're already inside a translator comment, continue appending
762814
translator_comments.append((token.lineno, value))
763815
continue
764816

765817
for comment_tag in comment_tags:
766818
if value.startswith(comment_tag):
767-
translator_comments.append((token.lineno, value.strip()))
819+
# Comment starts with one of the comment tags,
820+
# so let's start capturing it
821+
in_translator_comments = True
822+
translator_comments.append((token.lineno, value))
768823
break
769824

770825
elif token.type == 'multilinecomment':
771-
# only one multi-line comment may precede a translation
826+
# Only one multi-line comment may precede a translation
772827
translator_comments = []
773828
value = token.value[2:-2].strip()
774829
for comment_tag in comment_tags:
@@ -778,68 +833,67 @@ def extract_javascript(
778833
lines[0] = lines[0].strip()
779834
lines[1:] = dedent('\n'.join(lines[1:])).splitlines()
780835
for offset, line in enumerate(lines):
781-
translator_comments.append((token.lineno + offset,
782-
line))
836+
translator_comments.append((token.lineno + offset, line))
783837
break
784838

785-
elif funcname and call_stack == 0:
839+
elif function_stack and function_stack[-1].function_name in keywords:
840+
# We're inside a translation function call
786841
if token.type == 'operator' and token.value == ')':
787-
if last_argument is not None:
788-
messages.append(last_argument)
789-
if len(messages) > 1:
790-
messages = tuple(messages)
791-
elif messages:
792-
messages = messages[0]
842+
# The call has ended, so we yield the translatable term(s)
843+
messages = function_stack[-1].messages
844+
lineno = (
845+
function_stack[-1].message_lineno
846+
or function_stack[-1].function_lineno
847+
)
848+
cur_translator_comments = function_stack[-1].translator_comments
849+
850+
if message_buffer:
851+
messages.append(''.join(message_buffer))
852+
message_buffer.clear()
793853
else:
794-
messages = None
854+
messages.append(None)
795855

796-
# Comments don't apply unless they immediately precede the
797-
# message
798-
if translator_comments and \
799-
translator_comments[-1][0] < message_lineno - 1:
800-
translator_comments = []
856+
messages = tuple(messages) if len(messages) > 1 else messages[0]
857+
if (
858+
cur_translator_comments
859+
and cur_translator_comments[-1][0] < lineno - 1
860+
):
861+
# The translator comments are not immediately preceding the current
862+
# term, so we skip them.
863+
cur_translator_comments = []
801864

802-
if messages is not None:
803-
yield (message_lineno, funcname, messages,
804-
[comment[1] for comment in translator_comments])
865+
yield (
866+
lineno,
867+
function_stack[-1].function_name,
868+
messages,
869+
[comment[1] for comment in cur_translator_comments],
870+
)
805871

806-
funcname = message_lineno = last_argument = None
807-
concatenate_next = False
808-
translator_comments = []
809-
messages = []
810-
call_stack = -1
872+
function_stack.pop()
811873

812874
elif token.type in ('string', 'template_string'):
813-
new_value = unquote_string(token.value)
814-
if concatenate_next:
815-
last_argument = (last_argument or '') + new_value
816-
concatenate_next = False
875+
# We've encountered a string inside a translation function call
876+
string_value = unquote_string(token.value)
877+
if not function_stack[-1].message_lineno:
878+
function_stack[-1].message_lineno = token.lineno
879+
if string_value is not None:
880+
message_buffer.append(string_value)
881+
882+
elif token.type == 'operator' and token.value == ',':
883+
# End of a function call argument
884+
if message_buffer:
885+
function_stack[-1].messages.append(''.join(message_buffer))
886+
message_buffer.clear()
817887
else:
818-
last_argument = new_value
819-
820-
elif token.type == 'operator':
821-
if token.value == ',':
822-
if last_argument is not None:
823-
messages.append(last_argument)
824-
last_argument = None
825-
else:
826-
messages.append(None)
827-
concatenate_next = False
828-
elif token.value == '+':
829-
concatenate_next = True
830-
831-
elif call_stack > 0 and token.type == 'operator' \
832-
and token.value == ')':
833-
call_stack -= 1
834-
835-
elif funcname and call_stack == -1:
836-
funcname = None
837-
838-
elif call_stack == -1 and token.type == 'name' and \
839-
token.value in keywords and \
840-
(last_token is None or last_token.type != 'name' or
841-
last_token.value != 'function'):
842-
funcname = token.value
888+
function_stack[-1].messages.append(None)
889+
890+
elif function_stack and token.type == 'operator' and token.value == ')':
891+
function_stack.pop()
892+
893+
if in_translator_comments and translator_comments[-1][0] < token.lineno:
894+
# We have a newline in between the comments, so they don't belong
895+
# together anymore
896+
in_translator_comments = False
843897

844898
last_token = token
845899

tests/messages/test_js_extract.py

+32
Original file line numberDiff line numberDiff line change
@@ -191,3 +191,35 @@ def test_inside_nested_template_string():
191191
)
192192

193193
assert messages == [(1, 'Greetings!', [], None), (1, 'This is a lovely evening.', [], None), (1, 'The day is really nice!', [], None)]
194+
195+
def test_nested_gettext_calls():
196+
buf = BytesIO(b"""\
197+
gettext("Hello %s", gettext("User"));
198+
// NOTE: Double
199+
gettext("Hello %(user)s", { user: gettext("User") });
200+
// NOTE: Non Preceding
201+
202+
gettext("Hello %s", dummy.dummyFunction(gettext("User")));
203+
gettext(
204+
"Hello %(user)s",
205+
{ user: dummy.dummyFunction(gettext("User")) },
206+
);
207+
// NOTE: Another Comment
208+
gettext("Random %s", gettext`Shizzle`);
209+
""")
210+
messages = list(
211+
extract.extract('javascript', buf, {"gettext": None}, ['NOTE:'], {}),
212+
)
213+
214+
assert messages == [
215+
(1, 'User', [], None),
216+
(1, 'Hello %s', [], None),
217+
(3, 'User', ['NOTE: Double'], None),
218+
(3, 'Hello %(user)s', ['NOTE: Double'], None),
219+
(6, 'User', [], None),
220+
(6, 'Hello %s', [], None),
221+
(9, 'User', [], None),
222+
(8, 'Hello %(user)s', [], None),
223+
(12, 'Shizzle', ['NOTE: Another Comment'], None),
224+
(12, 'Random %s', ['NOTE: Another Comment'], None),
225+
]

0 commit comments

Comments
 (0)