@@ -502,14 +502,6 @@ def extract_python(
502
502
:param options: a dictionary of additional options (optional)
503
503
:rtype: ``iterator``
504
504
"""
505
- funcname = lineno = message_lineno = None
506
- call_stack = - 1
507
- buf = []
508
- messages = []
509
- translator_comments = []
510
- in_def = in_translator_comments = False
511
- comment_tag = None
512
-
513
505
encoding = parse_encoding (fileobj ) or options .get ('encoding' , 'UTF-8' )
514
506
future_flags = parse_future_flags (fileobj , encoding )
515
507
next_line = lambda : fileobj .readline ().decode (encoding )
@@ -520,103 +512,145 @@ def extract_python(
520
512
# currently parsing one.
521
513
current_fstring_start = None
522
514
523
- for tok , value , (lineno , _ ), _ , _ in tokens :
524
- if call_stack == - 1 and tok == NAME and value in ('def' , 'class' ):
515
+ # Keep the stack of all function calls and its related contextual variables,
516
+ # so we can handle nested gettext calls.
517
+ function_stack = []
518
+ # Keep the last encountered function name for when we encounter
519
+ # an opening parenthesis
520
+ last_function_name = None
521
+ # Keep track of whether we're in a class or function definition
522
+ in_def = False
523
+ # Keep track of whether we're in a block of translator comments
524
+ in_translator_comments = False
525
+ # Keep track of the last encountered translator comments
526
+ translator_comments = []
527
+ # Keep track of the (split) strings encountered
528
+ message_buffer = []
529
+
530
+ for token , value , (line_no , _ ), _ , _ in tokens :
531
+ if not function_stack and token == NAME and value in ('def' , 'class' ):
532
+ # We're entering a class or function definition
525
533
in_def = True
526
- elif tok == OP and value == '(' :
527
- if in_def :
528
- # Avoid false positives for declarations such as:
529
- # def gettext(arg='message'):
530
- in_def = False
531
- continue
532
- if funcname :
533
- message_lineno = lineno
534
- call_stack += 1
535
- elif in_def and tok == OP and value == ':' :
536
- # End of a class definition without parens
534
+
535
+ elif in_def and token == OP and value in ('(' , ':' ):
536
+ # We're in a class or function definition and should not do anything
537
537
in_def = False
538
538
continue
539
- elif call_stack == - 1 and tok == COMMENT :
539
+
540
+ elif token == OP and value == '(' and last_function_name :
541
+ # We're entering a function call
542
+ cur_translator_comments = translator_comments
543
+ if function_stack and function_stack [- 1 ]['function_line_no' ] == line_no :
544
+ # If our current function call is on the same line as the previous one,
545
+ # copy their translator comments, since they also apply to us.
546
+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
547
+
548
+ # We add all information needed later for the current function call
549
+ function_stack .append ({
550
+ 'function_line_no' : line_no ,
551
+ 'function_name' : last_function_name ,
552
+ 'message_line_no' : None ,
553
+ 'messages' : [],
554
+ 'translator_comments' : cur_translator_comments ,
555
+ })
556
+ translator_comments = []
557
+
558
+ elif token == COMMENT :
540
559
# Strip the comment token from the line
541
560
value = value [1 :].strip ()
542
- if in_translator_comments and \
543
- translator_comments [- 1 ][0 ] == lineno - 1 :
561
+ if in_translator_comments and translator_comments [- 1 ][0 ] == line_no - 1 :
544
562
# We're already inside a translator comment, continue appending
545
- translator_comments .append ((lineno , value ))
563
+ translator_comments .append ((line_no , value ))
546
564
continue
547
- # If execution reaches this point, let's see if comment line
548
- # starts with one of the comment tags
565
+
549
566
for comment_tag in comment_tags :
550
567
if value .startswith (comment_tag ):
568
+ # Comment starts with one of the comment tags,
569
+ # so let's start capturing it
551
570
in_translator_comments = True
552
- translator_comments .append ((lineno , value ))
571
+ translator_comments .append ((line_no , value ))
553
572
break
554
- elif funcname and call_stack == 0 :
555
- nested = (tok == NAME and value in keywords )
556
- if (tok == OP and value == ')' ) or nested :
557
- if buf :
558
- messages .append ('' .join (buf ))
559
- del buf [:]
573
+
574
+ elif function_stack and function_stack [- 1 ]['function_name' ] in keywords :
575
+ # We're inside a translation function call
576
+ if token == OP and value == ')' :
577
+ # The call has ended, so we yield the translatable term(s)
578
+ messages = function_stack [- 1 ]['messages' ]
579
+ line_no = (
580
+ function_stack [- 1 ]['message_line_no' ]
581
+ or function_stack [- 1 ]['function_line_no' ]
582
+ )
583
+ cur_translator_comments = function_stack [- 1 ]['translator_comments' ]
584
+
585
+ if message_buffer :
586
+ messages .append ('' .join (message_buffer ))
587
+ message_buffer .clear ()
560
588
else :
561
589
messages .append (None )
562
590
563
591
messages = tuple (messages ) if len (messages ) > 1 else messages [0 ]
564
- # Comments don't apply unless they immediately
565
- # precede the message
566
- if translator_comments and \
567
- translator_comments [- 1 ][0 ] < message_lineno - 1 :
568
- translator_comments = []
569
-
570
- yield (message_lineno , funcname , messages ,
571
- [comment [1 ] for comment in translator_comments ])
572
-
573
- funcname = lineno = message_lineno = None
574
- call_stack = - 1
575
- messages = []
576
- translator_comments = []
577
- in_translator_comments = False
578
- if nested :
579
- funcname = value
580
- elif tok == STRING :
581
- val = _parse_python_string (value , encoding , future_flags )
582
- if val is not None :
583
- buf .append (val )
592
+ if (
593
+ cur_translator_comments
594
+ and cur_translator_comments [- 1 ][0 ] < line_no - 1
595
+ ):
596
+ # The translator comments are not immediately preceding the current
597
+ # term, so we skip them.
598
+ cur_translator_comments = []
599
+
600
+ yield (
601
+ line_no ,
602
+ function_stack [- 1 ]['function_name' ],
603
+ messages ,
604
+ [comment [1 ] for comment in cur_translator_comments ],
605
+ )
606
+
607
+ function_stack .pop ()
608
+
609
+ elif token == STRING :
610
+ # We've encountered a string inside a translation function call
611
+ string_value = _parse_python_string (value , encoding , future_flags )
612
+ if not function_stack [- 1 ]['message_line_no' ]:
613
+ function_stack [- 1 ]['message_line_no' ] = line_no
614
+ if string_value is not None :
615
+ message_buffer .append (string_value )
584
616
585
617
# Python 3.12+, see https://peps.python.org/pep-0701/#new-tokens
586
- elif tok == FSTRING_START :
618
+ elif token == FSTRING_START :
587
619
current_fstring_start = value
588
- elif tok == FSTRING_MIDDLE :
620
+ elif token == FSTRING_MIDDLE :
589
621
if current_fstring_start is not None :
590
622
current_fstring_start += value
591
- elif tok == FSTRING_END :
623
+ elif token == FSTRING_END :
592
624
if current_fstring_start is not None :
593
625
fstring = current_fstring_start + value
594
- val = _parse_python_string (fstring , encoding , future_flags )
595
- if val is not None :
596
- buf .append (val )
597
-
598
- elif tok == OP and value == ',' :
599
- if buf :
600
- messages .append ('' .join (buf ))
601
- del buf [:]
626
+ string_value = _parse_python_string (fstring , encoding , future_flags )
627
+ if string_value is not None :
628
+ message_buffer .append (string_value )
629
+
630
+ elif token == OP and value == ',' :
631
+ # End of a function call argument
632
+ if message_buffer :
633
+ function_stack [- 1 ]['messages' ].append ('' .join (message_buffer ))
634
+ message_buffer .clear ()
602
635
else :
603
- messages .append (None )
604
- if translator_comments :
605
- # We have translator comments, and since we're on a
606
- # comma(,) user is allowed to break into a new line
607
- # Let's increase the last comment's lineno in order
608
- # for the comment to still be a valid one
609
- old_lineno , old_comment = translator_comments . pop ()
610
- translator_comments . append (( old_lineno + 1 , old_comment ))
611
- elif call_stack > 0 and tok == OP and value == ')' :
612
- call_stack -= 1
613
- elif funcname and call_stack == - 1 :
614
- funcname = None
615
- elif tok == NAME and value in keywords :
616
- funcname = value
636
+ function_stack [ - 1 ][ ' messages' ] .append (None )
637
+
638
+ elif function_stack and token == OP and value == ')' :
639
+ function_stack . pop ()
640
+
641
+ if in_translator_comments and translator_comments [ - 1 ][ 0 ] < line_no :
642
+ # We have a newline in between the comments, so they don't belong
643
+ # together anymore
644
+ in_translator_comments = False
645
+
646
+ if token == NAME :
647
+ last_function_name = value
648
+ if function_stack and not function_stack [ - 1 ][ 'message_line_no' ] :
649
+ function_stack [ - 1 ][ 'message_line_no' ] = line_no
617
650
618
- if (current_fstring_start is not None
619
- and tok not in {FSTRING_START , FSTRING_MIDDLE }
651
+ if (
652
+ current_fstring_start is not None
653
+ and token not in {FSTRING_START , FSTRING_MIDDLE }
620
654
):
621
655
# In Python 3.12, tokens other than FSTRING_* mean the
622
656
# f-string is dynamic, so we don't wan't to extract it.
0 commit comments