@@ -721,54 +721,109 @@ def extract_javascript(
721
721
:param lineno: line number offset (for parsing embedded fragments)
722
722
"""
723
723
from babel .messages .jslexer import Token , tokenize , unquote_string
724
- funcname = message_lineno = None
725
- messages = []
726
- last_argument = None
727
- translator_comments = []
728
- concatenate_next = False
724
+
729
725
encoding = options .get ('encoding' , 'utf-8' )
730
- last_token = None
731
- call_stack = - 1
732
726
dotted = any ('.' in kw for kw in keywords )
727
+ last_token = None
728
+ # Keep the stack of all function calls and its related contextual variables,
729
+ # so we can handle nested gettext calls.
730
+ function_stack : list [FunctionStackItem ] = []
731
+ # Keep track of whether we're in a class or function definition
732
+ in_def = False
733
+ # Keep track of whether we're in a block of translator comments
734
+ in_translator_comments = False
735
+ # Keep track of the last encountered translator comments
736
+ translator_comments = []
737
+ # Keep track of the (split) strings encountered
738
+ message_buffer = []
739
+
733
740
for token in tokenize (
734
741
fileobj .read ().decode (encoding ),
735
742
jsx = options .get ("jsx" , True ),
736
743
template_string = options .get ("template_string" , True ),
737
744
dotted = dotted ,
738
745
lineno = lineno ,
739
746
):
740
- if ( # Turn keyword`foo` expressions into keyword("foo") calls:
741
- funcname and # have a keyword...
742
- (last_token and last_token .type == 'name' ) and # we've seen nothing after the keyword...
743
- token .type == 'template_string' # this is a template string
747
+ if token .type == 'name' and token .value in ('class' , 'function' ):
748
+ # We're entering a class or function definition
749
+ in_def = True
750
+
751
+ elif in_def and token .type == 'operator' and token .value in ('(' , '{' ):
752
+ # We're in a class or function definition and should not do anything
753
+ in_def = False
754
+ continue
755
+
756
+ elif (
757
+ last_token
758
+ and last_token .type == 'name'
759
+ and last_token .value in keywords
760
+ and token .type == 'template_string'
744
761
):
745
- message_lineno = token .lineno
746
- messages = [unquote_string (token .value )]
747
- call_stack = 0
762
+ # Turn keyword`foo` expressions into keyword("foo") function calls
763
+ string_value = unquote_string (token .value )
764
+ cur_translator_comments = translator_comments
765
+ if function_stack and function_stack [- 1 ].function_lineno == last_token .lineno :
766
+ # If our current function call is on the same line as the previous one,
767
+ # copy their translator comments, since they also apply to us.
768
+ cur_translator_comments = function_stack [- 1 ].translator_comments
769
+
770
+ # We add all information needed later for the current function call
771
+ function_stack .append (FunctionStackItem (
772
+ function_lineno = last_token .lineno ,
773
+ function_name = last_token .value ,
774
+ message_lineno = token .lineno ,
775
+ messages = [string_value ],
776
+ translator_comments = cur_translator_comments ,
777
+ ))
778
+ translator_comments = []
779
+
780
+ # We act as if we are closing the function call now
748
781
token = Token ('operator' , ')' , token .lineno )
749
782
750
- if options .get ('parse_template_string' ) and not funcname and token .type == 'template_string' :
783
+ if (
784
+ options .get ('parse_template_string' )
785
+ and (not last_token or last_token .type != 'name' or last_token .value not in keywords )
786
+ and token .type == 'template_string'
787
+ ):
751
788
yield from parse_template_string (token .value , keywords , comment_tags , options , token .lineno )
752
789
753
790
elif token .type == 'operator' and token .value == '(' :
754
- if funcname :
755
- message_lineno = token .lineno
756
- call_stack += 1
791
+ if last_token .type == 'name' :
792
+ # We're entering a function call
793
+ cur_translator_comments = translator_comments
794
+ if function_stack and function_stack [- 1 ].function_lineno == token .lineno :
795
+ # If our current function call is on the same line as the previous one,
796
+ # copy their translator comments, since they also apply to us.
797
+ cur_translator_comments = function_stack [- 1 ].translator_comments
798
+
799
+ # We add all information needed later for the current function call
800
+ function_stack .append (FunctionStackItem (
801
+ function_lineno = token .lineno ,
802
+ function_name = last_token .value ,
803
+ message_lineno = None ,
804
+ messages = [],
805
+ translator_comments = cur_translator_comments ,
806
+ ))
807
+ translator_comments = []
757
808
758
- elif call_stack == - 1 and token .type == 'linecomment' :
809
+ elif token .type == 'linecomment' :
810
+ # Strip the comment token from the line
759
811
value = token .value [2 :].strip ()
760
- if translator_comments and \
761
- translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
812
+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] == token . lineno - 1 :
813
+ # We're already inside a translator comment, continue appending
762
814
translator_comments .append ((token .lineno , value ))
763
815
continue
764
816
765
817
for comment_tag in comment_tags :
766
818
if value .startswith (comment_tag ):
767
- translator_comments .append ((token .lineno , value .strip ()))
819
+ # Comment starts with one of the comment tags,
820
+ # so let's start capturing it
821
+ in_translator_comments = True
822
+ translator_comments .append ((token .lineno , value ))
768
823
break
769
824
770
825
elif token .type == 'multilinecomment' :
771
- # only one multi-line comment may precede a translation
826
+ # Only one multi-line comment may precede a translation
772
827
translator_comments = []
773
828
value = token .value [2 :- 2 ].strip ()
774
829
for comment_tag in comment_tags :
@@ -778,68 +833,67 @@ def extract_javascript(
778
833
lines [0 ] = lines [0 ].strip ()
779
834
lines [1 :] = dedent ('\n ' .join (lines [1 :])).splitlines ()
780
835
for offset , line in enumerate (lines ):
781
- translator_comments .append ((token .lineno + offset ,
782
- line ))
836
+ translator_comments .append ((token .lineno + offset , line ))
783
837
break
784
838
785
- elif funcname and call_stack == 0 :
839
+ elif function_stack and function_stack [- 1 ].function_name in keywords :
840
+ # We're inside a translation function call
786
841
if token .type == 'operator' and token .value == ')' :
787
- if last_argument is not None :
788
- messages .append (last_argument )
789
- if len (messages ) > 1 :
790
- messages = tuple (messages )
791
- elif messages :
792
- messages = messages [0 ]
842
+ # The call has ended, so we yield the translatable term(s)
843
+ messages = function_stack [- 1 ].messages
844
+ lineno = (
845
+ function_stack [- 1 ].message_lineno
846
+ or function_stack [- 1 ].function_lineno
847
+ )
848
+ cur_translator_comments = function_stack [- 1 ].translator_comments
849
+
850
+ if message_buffer :
851
+ messages .append ('' .join (message_buffer ))
852
+ message_buffer .clear ()
793
853
else :
794
- messages = None
854
+ messages . append ( None )
795
855
796
- # Comments don't apply unless they immediately precede the
797
- # message
798
- if translator_comments and \
799
- translator_comments [- 1 ][0 ] < message_lineno - 1 :
800
- translator_comments = []
856
+ messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
857
+ if (
858
+ cur_translator_comments
859
+ and cur_translator_comments [- 1 ][0 ] < lineno - 1
860
+ ):
861
+ # The translator comments are not immediately preceding the current
862
+ # term, so we skip them.
863
+ cur_translator_comments = []
801
864
802
- if messages is not None :
803
- yield (message_lineno , funcname , messages ,
804
- [comment [1 ] for comment in translator_comments ])
865
+ yield (
866
+ lineno ,
867
+ function_stack [- 1 ].function_name ,
868
+ messages ,
869
+ [comment [1 ] for comment in cur_translator_comments ],
870
+ )
805
871
806
- funcname = message_lineno = last_argument = None
807
- concatenate_next = False
808
- translator_comments = []
809
- messages = []
810
- call_stack = - 1
872
+ function_stack .pop ()
811
873
812
874
elif token .type in ('string' , 'template_string' ):
813
- new_value = unquote_string (token .value )
814
- if concatenate_next :
815
- last_argument = (last_argument or '' ) + new_value
816
- concatenate_next = False
875
+ # We've encountered a string inside a translation function call
876
+ string_value = unquote_string (token .value )
877
+ if not function_stack [- 1 ].message_lineno :
878
+ function_stack [- 1 ].message_lineno = token .lineno
879
+ if string_value is not None :
880
+ message_buffer .append (string_value )
881
+
882
+ elif token .type == 'operator' and token .value == ',' :
883
+ # End of a function call argument
884
+ if message_buffer :
885
+ function_stack [- 1 ].messages .append ('' .join (message_buffer ))
886
+ message_buffer .clear ()
817
887
else :
818
- last_argument = new_value
819
-
820
- elif token .type == 'operator' :
821
- if token .value == ',' :
822
- if last_argument is not None :
823
- messages .append (last_argument )
824
- last_argument = None
825
- else :
826
- messages .append (None )
827
- concatenate_next = False
828
- elif token .value == '+' :
829
- concatenate_next = True
830
-
831
- elif call_stack > 0 and token .type == 'operator' \
832
- and token .value == ')' :
833
- call_stack -= 1
834
-
835
- elif funcname and call_stack == - 1 :
836
- funcname = None
837
-
838
- elif call_stack == - 1 and token .type == 'name' and \
839
- token .value in keywords and \
840
- (last_token is None or last_token .type != 'name' or
841
- last_token .value != 'function' ):
842
- funcname = token .value
888
+ function_stack [- 1 ].messages .append (None )
889
+
890
+ elif function_stack and token .type == 'operator' and token .value == ')' :
891
+ function_stack .pop ()
892
+
893
+ if in_translator_comments and translator_comments [- 1 ][0 ] < token .lineno :
894
+ # We have a newline in between the comments, so they don't belong
895
+ # together anymore
896
+ in_translator_comments = False
843
897
844
898
last_token = token
845
899
0 commit comments