sillsdev
diff --git a/‎machine/corpora/__init__.py
Lines changed: 9 additions & 2 deletions b/‎machine/corpora/__init__.py
Lines changed: 9 additions & 2 deletions
diff --git a/‎machine/corpora/paratext_project_text_updater_base.py
Lines changed: 4 additions & 4 deletions b/‎machine/corpora/paratext_project_text_updater_base.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎machine/corpora/scripture_embed.py
Lines changed: 0 additions & 16 deletions b/‎machine/corpora/scripture_embed.py
Lines changed: 0 additions & 16 deletions
diff --git a/‎machine/corpora/scripture_ref_usfm_parser_handler.py
Lines changed: 40 additions & 76 deletions b/‎machine/corpora/scripture_ref_usfm_parser_handler.py
Lines changed: 40 additions & 76 deletions
diff --git a/‎machine/corpora/scripture_update_block.py
Lines changed: 0 additions & 53 deletions b/‎machine/corpora/scripture_update_block.py
Lines changed: 0 additions & 53 deletions
diff --git a/‎machine/corpora/scripture_update_block_handler.py
Lines changed: 0 additions & 11 deletions b/‎machine/corpora/scripture_update_block_handler.py
Lines changed: 0 additions & 11 deletions
diff --git a/‎machine/corpora/scripture_update_element.py
Lines changed: 0 additions & 44 deletions b/‎machine/corpora/scripture_update_element.py
Lines changed: 0 additions & 44 deletions
@@ -61,6 +61,9 @@
 from .usfm_tag import UsfmJustification, UsfmStyleAttribute, UsfmStyleType, UsfmTag, UsfmTextProperties, UsfmTextType
 from .usfm_token import UsfmAttribute, UsfmToken, UsfmTokenType
 from .usfm_tokenizer import RtlReferenceOrder, UsfmTokenizer
+from .usfm_update_block import UsfmUpdateBlock
+from .usfm_update_block_element import UsfmUpdateBlockElement, UsfmUpdateBlockElementType
+from .usfm_update_block_handler import UsfmUpdateBlockHandler
 from .usx_file_alignment_collection import UsxFileAlignmentCollection
 from .usx_file_alignment_corpus import UsxFileAlignmentCorpus
 from .usx_file_text import UsxFileText
@@ -92,8 +95,8 @@
     "is_scripture",
     "lowercase",
     "MemoryAlignmentCollection",
-    "MemoryText",
     "MemoryStreamContainer",
+    "MemoryText",
     "MultiKeyRef",
     "nfc_normalize",
     "nfd_normalize",
@@ -126,9 +129,9 @@
     "TextRow",
     "TextRowFlags",
     "unescape_spaces",
-    "UpdateUsfmTextBehavior",
     "UpdateUsfmMarkerBehavior",
     "UpdateUsfmParserHandler",
+    "UpdateUsfmTextBehavior",
     "UsfmAttribute",
     "UsfmElementType",
     "UsfmFileText",
@@ -148,6 +151,10 @@
     "UsfmToken",
     "UsfmTokenizer",
     "UsfmTokenType",
+    "UsfmUpdateBlock",
+    "UsfmUpdateBlockElement",
+    "UsfmUpdateBlockElementType",
+    "UsfmUpdateBlockHandler",
     "UsxFileAlignmentCollection",
     "UsxFileAlignmentCorpus",
     "UsxFileText",
 
@@ -1,13 +1,13 @@
 from abc import ABC, abstractmethod
-from typing import BinaryIO, Optional, Sequence, Tuple, Union
+from typing import BinaryIO, Iterable, Optional, Sequence, Tuple, Union
 
 from ..utils.typeshed import StrPath
 from .paratext_project_settings import ParatextProjectSettings
 from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
 from .scripture_ref import ScriptureRef
-from .scripture_update_block_handler import ScriptureUpdateBlockHandler
 from .update_usfm_parser_handler import UpdateUsfmMarkerBehavior, UpdateUsfmParserHandler, UpdateUsfmTextBehavior
 from .usfm_parser import parse_usfm
+from .usfm_update_block_handler import UsfmUpdateBlockHandler
 
 
 class ParatextProjectTextUpdaterBase(ABC):
@@ -26,8 +26,8 @@ def update_usfm(
         paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
-        preserve_paragraph_styles: Optional[Sequence[str]] = None,
-        update_block_handlers: Optional[list[ScriptureUpdateBlockHandler]] = None,
+        preserve_paragraph_styles: Optional[Union[Iterable[str], str]] = None,
+        update_block_handlers: Optional[Iterable[UsfmUpdateBlockHandler]] = None,
     ) -> Optional[str]:
         file_name: str = self._settings.get_book_file_name(book_id)
         if not self._exists(file_name):
 
@@ -5,7 +5,6 @@
 from ..scripture.verse_ref import VerseRef, are_overlapping_verse_ranges
 from .corpora_utils import merge_verse_ranges
 from .scripture_element import ScriptureElement
-from .scripture_embed import EMBED_PART_START_CHAR_STYLES, is_embed_part_style, is_embed_style, is_note_text
 from .scripture_ref import ScriptureRef
 from .usfm_parser_handler import UsfmParserHandler
 from .usfm_parser_state import UsfmParserState
@@ -16,7 +15,14 @@ class ScriptureTextType(Enum):
     NONE = auto()
     NONVERSE = auto()
     VERSE = auto()
-    NOTE_TEXT = auto()
+    EMBED = auto()
+
+
+_EMBED_STYLES = {"f", "fe", "x", "fig"}
+
+
+def _is_embed_style(marker: Optional[str]) -> bool:
+    return marker is not None and (marker.strip("*") in _EMBED_STYLES or marker.startswith("z"))
 
 
 class ScriptureRefUsfmParserHandler(UsfmParserHandler, ABC):
@@ -25,18 +31,11 @@ def __init__(self) -> None:
         self._cur_elements_stack: List[ScriptureElement] = []
         self._cur_text_type_stack: List[ScriptureTextType] = []
         self._duplicate_verse: bool = False
-        self._in_preserved_paragraph: bool = False
-        self._in_embed: bool = False
-        self._in_note_text: bool = False
-        self._in_nested_embed: bool = False
 
     @property
     def _current_text_type(self) -> ScriptureTextType:
         return ScriptureTextType.NONE if len(self._cur_text_type_stack) == 0 else self._cur_text_type_stack[-1]
 
-    def _is_in_note_text(self) -> bool:
-        return self._in_note_text
-
     def end_usfm(self, state: UsfmParserState) -> None:
         self._end_verse_text_wrapper(state)
 
@@ -112,32 +111,6 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
     def end_sidebar(self, state: UsfmParserState, marker: str, closed: bool) -> None:
         self._end_parent_element()
 
-    def start_note(self, state: UsfmParserState, marker: str, caller: str, category: Optional[str]) -> None:
-        self._in_embed = True
-        self._start_embed_wrapper(state, marker)
-
-    def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None:
-        self._end_note_text_wrapper(state)
-        self._end_embed(state, marker, None, closed)
-        self._in_embed = False
-
-    def _start_embed_wrapper(self, state: UsfmParserState, marker: str) -> None:
-        if self._cur_verse_ref.is_default:
-            self._update_verse_ref(state.verse_ref, marker)
-
-        if not self._duplicate_verse:
-            self._check_convert_verse_para_to_non_verse(state)
-            self._next_element(marker)
-
-        self._start_embed(state, self._create_non_verse_ref())
-
-    def _start_embed(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
-
-    def _end_embed(
-        self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
-    ) -> None:
-        pass
-
     def text(self, state: UsfmParserState, text: str) -> None:
         # if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment
         if text.strip():
@@ -149,29 +122,23 @@ def opt_break(self, state: UsfmParserState) -> None:
     def start_char(
         self, state: UsfmParserState, marker: str, unknown: bool, attributes: Optional[Sequence[UsfmAttribute]]
     ) -> None:
-        if is_embed_part_style(marker) and self._in_note_text:
-            self._in_nested_embed = True
         # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
         self._check_convert_verse_para_to_non_verse(state)
 
-        if is_embed_style(marker):
-            self._in_embed = True
-            self._start_embed_wrapper(state, marker)
-
-        if is_note_text(marker):
-            self._start_note_text_wrapper(state)
+        if _is_embed_style(marker):
+            self._start_embed_text_wrapper(state, marker)
 
     def end_char(
         self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
     ) -> None:
-        if is_embed_part_style(marker):
-            if self._in_nested_embed:
-                self._in_nested_embed = False
-            elif self._is_note_text(marker):
-                self._end_note_text_wrapper(state)
-        if is_embed_style(marker):
-            self._end_embed(state, marker, attributes, closed)
-            self._in_embed = False
+        if _is_embed_style(marker):
+            self._end_embed_text_wrapper(state)
+
+    def start_note(self, state, marker, caller, category) -> None:
+        self._start_embed_text_wrapper(state, marker)
+
+    def end_note(self, state, marker, closed) -> None:
+        self._end_embed_text_wrapper(state)
 
     def _start_verse_text(self, state: UsfmParserState, scripture_refs: Optional[Sequence[ScriptureRef]]) -> None: ...
 
@@ -181,20 +148,9 @@ def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: Scripture
 
     def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
 
-    def _start_note_text_wrapper(self, state: UsfmParserState):
-        self._in_note_text = True
-        self._cur_text_type_stack.append(ScriptureTextType.NOTE_TEXT)
-        self._start_note_text(state)
-
-    def _start_note_text(self, state: UsfmParserState) -> None: ...
-
-    def _end_note_text_wrapper(self, state: UsfmParserState):
-        if self._cur_text_type_stack and self._cur_text_type_stack[-1] == ScriptureTextType.NOTE_TEXT:
-            self._end_note_text(state, self._create_non_verse_ref())
-            self._cur_text_type_stack.pop()
-            self._in_note_text = False
+    def _start_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
 
-    def _end_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
+    def _end_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
 
     def _start_verse_text_wrapper(self, state: UsfmParserState) -> None:
         self._duplicate_verse = False
@@ -222,6 +178,25 @@ def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
             self._cur_elements_stack.append(ScriptureElement(0, marker))
         self._cur_verse_ref = verse_ref.copy()
 
+    def _start_embed_text_wrapper(self, state: UsfmParserState, marker: str) -> None:
+        if self._cur_verse_ref.is_default:
+            self._update_verse_ref(state.verse_ref, marker)
+
+        if not self._duplicate_verse:
+            self._check_convert_verse_para_to_non_verse(state)
+            self._next_element(marker)
+            self._cur_text_type_stack.append(ScriptureTextType.EMBED)
+            self._start_embed_text(state, self._create_non_verse_ref())
+
+    def _end_embed_text_wrapper(self, state: UsfmParserState) -> None:
+        if (
+            not self._duplicate_verse
+            and self._cur_text_type_stack
+            and self._cur_text_type_stack[-1] == ScriptureTextType.EMBED
+        ):
+            self._end_embed_text(state, self._create_non_verse_ref())
+            self._cur_text_type_stack.pop()
+
     def _next_element(self, marker: str) -> None:
         prev_elem: ScriptureElement = self._cur_elements_stack.pop()
         self._cur_elements_stack.append(ScriptureElement(prev_elem.position + 1, marker))
@@ -234,7 +209,7 @@ def _end_parent_element(self) -> None:
         self._cur_elements_stack.pop()
 
     def _end_embed_elements(self) -> None:
-        if self._cur_elements_stack and is_embed_style(self._cur_elements_stack[-1].name):
+        if self._cur_elements_stack and _is_embed_style(self._cur_elements_stack[-1].name):
             self._cur_elements_stack.pop()
 
     def _create_verse_refs(self) -> List[ScriptureRef]:
@@ -263,14 +238,3 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
         ):
             self._start_parent_element(para_tag.marker)
             self._start_non_verse_text_wrapper(state)
-
-    def _is_in_embed(self, marker: Optional[str]) -> bool:
-        return self._in_embed or is_embed_style(marker)
-
-    def _is_in_nested_embed(self, marker: Optional[str]) -> bool:
-        return self._in_nested_embed or (
-            marker is not None
-            and marker.startswith("+")
-            and marker[1] in EMBED_PART_START_CHAR_STYLES
-            and marker != "fm"
-        )