From 157ec482761d2f12b384db236d5716c374a294eb Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Wed, 2 Apr 2025 11:13:04 -0400
Subject: [PATCH 01/11] Some tests pass

---
 machine/corpora/scripture_update_block.py     |  45 +++++++
 .../scripture_update_block_handler_base.py    |   9 ++
 ...date_block_handler_first_elements_first.py |  23 ++++
 machine/corpora/scripture_update_element.py   |  24 ++++
 machine/corpora/update_usfm_parser_handler.py | 112 ++++++++++++------
 5 files changed, 178 insertions(+), 35 deletions(-)
 create mode 100644 machine/corpora/scripture_update_block.py
 create mode 100644 machine/corpora/scripture_update_block_handler_base.py
 create mode 100644 machine/corpora/scripture_update_block_handler_first_elements_first.py
 create mode 100644 machine/corpora/scripture_update_element.py

diff --git a/machine/corpora/scripture_update_block.py b/machine/corpora/scripture_update_block.py
new file mode 100644
index 00000000..00787cf2
--- /dev/null
+++ b/machine/corpora/scripture_update_block.py
@@ -0,0 +1,45 @@
+from __future__ import annotations
+
+from .scripture_ref import ScriptureRef
+from .scripture_update_element import ScriptureUpdateElement, ScriptureUpdateElementType
+from .usfm_token import UsfmToken, UsfmTokenType
+
+
+class ScriptureUpdateBlock:
+
+    def __init__(self) -> None:
+        self._ref: ScriptureRef = ScriptureRef()
+        self._elements: list[ScriptureUpdateElement] = []
+
+    def add_existing_text(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
+        self._elements.append(
+            ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
+        )
+
+    def add_inserted_text(self, tokens: list[UsfmToken]) -> None:
+        self._elements.append(ScriptureUpdateElement(ScriptureUpdateElementType.INSERTED_TEXT, tokens.copy()))
+
+    def add_token(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
+        if token.type == UsfmTokenType.TEXT:
+            self._elements.append(
+                ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
+            )
+        else:
+            self._elements.append(ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, [token], marked_for_removal))
+
+    def add_tokens(self, tokens: list[UsfmToken], marked_for_removal: bool = False) -> None:
+        if len(tokens) == 0:
+            return
+        self._elements.append(
+            ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, tokens.copy(), marked_for_removal)
+        )
+
+    def update_ref(self, ref: ScriptureRef) -> None:
+        self._ref = ref
+
+    def clear(self) -> None:
+        self._elements.clear()
+        self._ref = ScriptureRef()
+
+    def get_tokens(self) -> list[UsfmToken]:
+        return [token for element in self._elements for token in element.get_tokens()]
diff --git a/machine/corpora/scripture_update_block_handler_base.py b/machine/corpora/scripture_update_block_handler_base.py
new file mode 100644
index 00000000..2998a0d9
--- /dev/null
+++ b/machine/corpora/scripture_update_block_handler_base.py
@@ -0,0 +1,9 @@
+from __future__ import annotations
+
+from .scripture_update_block import ScriptureUpdateBlock
+
+
+class ScriptureUpdateBlockHandlerBase:
+
+    def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock:
+        raise NotImplementedError("Must be implemented in subclass")
diff --git a/machine/corpora/scripture_update_block_handler_first_elements_first.py b/machine/corpora/scripture_update_block_handler_first_elements_first.py
new file mode 100644
index 00000000..17f44798
--- /dev/null
+++ b/machine/corpora/scripture_update_block_handler_first_elements_first.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+from .scripture_update_block import ScriptureUpdateBlock
+from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
+from .scripture_update_element import ScriptureUpdateElementType
+
+
+class ScriptureUpdateBlockHandlerFirstElementsFirst(ScriptureUpdateBlockHandlerBase):
+
+    def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock:
+        # If a paragraph, embed or style element occurs before existing text, move it before inserted text as well.
+        current_insert_index = 0
+        for current_index in range(len(block._elements)):
+            element = block._elements[current_index]
+            if element.type == ScriptureUpdateElementType.EXISTING_TEXT:
+                # we found existing text, so we stop looking for elements to move
+                break
+            if current_index != current_insert_index and element.type != ScriptureUpdateElementType.INSERTED_TEXT:
+                block._elements.remove(element)
+                block._elements.insert(current_insert_index, element)
+                current_insert_index += 1
+
+        return block
diff --git a/machine/corpora/scripture_update_element.py b/machine/corpora/scripture_update_element.py
new file mode 100644
index 00000000..fe39d7e5
--- /dev/null
+++ b/machine/corpora/scripture_update_element.py
@@ -0,0 +1,24 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from enum import Enum, auto
+
+from .usfm_token import UsfmToken
+
+
+class ScriptureUpdateElementType(Enum):
+    EXISTING_TEXT = auto()
+    INSERTED_TEXT = auto()
+    OTHER = auto()
+
+
+@dataclass
+class ScriptureUpdateElement:
+    type: ScriptureUpdateElementType
+    tokens: list[UsfmToken]
+    marked_for_removal: bool = False
+
+    def get_tokens(self) -> list[UsfmToken]:
+        if self.marked_for_removal:
+            return []
+        return self.tokens
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index b3ebe2be..c05989d9 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -1,8 +1,12 @@
 from enum import Enum, auto
 from typing import List, Optional, Sequence, Tuple, Union
 
+from ..scripture.verse_ref import VerseRef
 from .scripture_ref import ScriptureRef
 from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler
+from .scripture_update_block import ScriptureUpdateBlock
+from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
+from .scripture_update_block_handler_first_elements_first import ScriptureUpdateBlockHandlerFirstElementsFirst
 from .usfm_parser_state import UsfmParserState
 from .usfm_stylesheet import UsfmStylesheet
 from .usfm_token import UsfmAttribute, UsfmToken, UsfmTokenType
@@ -31,13 +35,20 @@ def __init__(
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
         preserve_paragraph_styles: Optional[Sequence[str]] = None,
+        update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
     ) -> None:
         super().__init__()
         self._rows = rows or []
         self._tokens: List[UsfmToken] = []
-        self._new_tokens: List[UsfmToken] = []
-        self._new_embed_tokens: List[UsfmToken] = []
+        self._updated_text: List[UsfmToken] = []
+        self._updated_embed_text: List[UsfmToken] = []
+        self._update_block: ScriptureUpdateBlock = ScriptureUpdateBlock()
+        self._embed_update_block: ScriptureUpdateBlock = ScriptureUpdateBlock()
         self._id_text = id_text
+        if update_block_handlers is None:
+            self._update_block_handlers = [ScriptureUpdateBlockHandlerFirstElementsFirst()]
+        else:
+            self._update_block_handlers = update_block_handlers
         if preserve_paragraph_styles is None:
             self._preserve_paragraph_styles = set(["r", "rem"])
         elif isinstance(preserve_paragraph_styles, str):
@@ -60,7 +71,7 @@ def tokens(self) -> List[UsfmToken]:
 
     def end_usfm(self, state: UsfmParserState) -> None:
         self._collect_tokens(state)
-
+        self._process_update_block()
         super().end_usfm(state)
 
     def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
@@ -68,13 +79,12 @@ def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
         start_book_tokens: List[UsfmToken] = []
         if self._id_text is not None:
             start_book_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=self._id_text + " "))
-        self._push_new_tokens(start_book_tokens)
+        self._update_block.add_tokens(start_book_tokens)
 
         super().start_book(state, marker, code)
 
     def end_book(self, state: UsfmParserState, marker: str) -> None:
-        self._pop_new_tokens()
-
+        self._process_update_block()
         super().end_book(state, marker)
 
     def start_para(
@@ -99,6 +109,7 @@ def start_para(
         super().start_para(state, marker, unknown, attributes)
 
     def end_para(self, state: UsfmParserState, marker: str) -> None:
+        self._process_update_block()
         super().end_para(state, marker)
         self._in_preserved_paragraph = False
 
@@ -114,7 +125,7 @@ def start_cell(self, state: UsfmParserState, marker: str, align: str, colspan: i
 
     def end_cell(self, state: UsfmParserState, marker: str) -> None:
         self._collect_tokens(state)
-
+        self._process_update_block()
         super().end_cell(state, marker)
 
     def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> None:
@@ -125,6 +136,7 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
     def end_sidebar(self, state: UsfmParserState, marker: str, closed: bool) -> None:
         if closed:
             self._collect_tokens(state)
+            self._process_update_block()
 
         super().end_sidebar(state, marker, closed)
 
@@ -137,6 +149,7 @@ def chapter(
         pub_number: str,
     ) -> None:
         self._collect_tokens(state)
+        self._process_update_block()
 
         super().chapter(state, number, marker, alt_number, pub_number)
 
@@ -148,6 +161,7 @@ def milestone(
         attributes: Sequence[UsfmAttribute],
     ) -> None:
         self._collect_tokens(state)
+        self._process_update_block()
 
         super().milestone(state, marker, start_milestone, attributes)
 
@@ -160,6 +174,7 @@ def verse(
         pub_number: str,
     ) -> None:
         self._collect_tokens(state)
+        self._process_update_block()
 
         super().verse(state, number, marker, alt_number, pub_number)
 
@@ -199,6 +214,7 @@ def _start_embed(
         state: UsfmParserState,
         scripture_ref: ScriptureRef,
     ) -> None:
+        self._embed_update_block.update_ref(scripture_ref)
         self._embed_row_texts = self._advance_rows([scripture_ref])
         self._embed_updated = any(self._embed_row_texts)
 
@@ -217,6 +233,7 @@ def _end_embed(
             else:
                 self._collect_tokens(state)
 
+        self._process_embed_update_block()
         self._embed_row_texts.clear()
         self._embed_updated = False
 
@@ -256,20 +273,20 @@ def unmatched(self, state: UsfmParserState, marker: str) -> None:
 
     def _start_verse_text(self, state: UsfmParserState, scripture_refs: Sequence[ScriptureRef]) -> None:
         row_texts: List[str] = self._advance_rows(scripture_refs)
-        self._push_new_tokens([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
+        self._push_updated_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
 
     def _end_verse_text(self, state: UsfmParserState, scripture_refs: Sequence[ScriptureRef]) -> None:
         self._pop_new_tokens()
 
     def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
         row_texts = self._advance_rows([scripture_ref])
-        self._push_new_tokens([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
+        self._push_updated_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
 
     def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
         self._pop_new_tokens()
 
     def _start_note_text(self, state: UsfmParserState) -> None:
-        self._push_new_embed_tokens([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in self._embed_row_texts])
+        self._push_updated_embed_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in self._embed_row_texts])
 
     def _end_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
         self._embed_row_texts.clear()
@@ -306,13 +323,15 @@ def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> List[str]:
         return row_texts
 
     def _collect_tokens(self, state: UsfmParserState) -> None:
-        self._tokens.extend(self._new_tokens)
-        self._new_tokens.clear()
+        self._use_updated_text()
         while self._token_index <= state.index + state.special_token_count:
-            self._tokens.append(state.tokens[self._token_index])
+            self._update_block.add_token(state.tokens[self._token_index])
             self._token_index += 1
 
     def _skip_tokens(self, state: UsfmParserState) -> None:
+        while self._token_index <= state.index + state.special_token_count:
+            self._update_block.add_token(state.tokens[self._token_index], marked_for_removal=True)
+            self._token_index += 1
         self._token_index = state.index + 1 + state.special_token_count
 
     def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True) -> bool:
@@ -348,24 +367,24 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
 
         if use_new_tokens:
             if in_embed:
-                self._add_new_embed_tokens()
+                self._use_updated_embed_text()
             else:
-                self._add_new_tokens()
+                self._use_updated_text()
 
         if existing_text and (
             self._text_behavior == UpdateUsfmTextBehavior.PREFER_EXISTING or self._is_in_preserved_paragraph(marker)
         ):
             if in_embed:
-                self._clear_new_embed_tokens()
+                self._clear_updated_embed_text()
             else:
-                self._clear_new_tokens()
+                self._clear_updated_text()
 
         embed_in_new_verse_text = (
             any(self._replace_stack) or self._text_behavior == UpdateUsfmTextBehavior.STRIP_EXISTING
         ) and in_embed
         if embed_in_new_verse_text or self._embed_updated:
             if self._embed_behavior == UpdateUsfmMarkerBehavior.STRIP:
-                self._clear_new_embed_tokens()
+                self._clear_updated_embed_text()
                 return True
             if not self._is_in_note_text() or in_nested_embed:
                 return False
@@ -380,33 +399,56 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
     def _has_new_text(self) -> bool:
         return any(self._replace_stack) and self._replace_stack[-1]
 
-    def _push_new_tokens(self, tokens: List[UsfmToken]) -> None:
+    def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
+        super()._update_verse_ref(verse_ref, marker)
+        self._update_block.update_ref(ScriptureRef(verse_ref.copy()))
+
+    def _create_non_verse_ref(self) -> ScriptureRef:
+        ref = super()._create_non_verse_ref()
+        self._update_block.update_ref(ref)
+        return ref
+
+    def _process_update_block(self) -> None:
+        self._use_updated_text()
+        for handler in self._update_block_handlers:
+            self._update_block = handler.process_block(self._update_block)
+        self._tokens.extend(self._update_block.get_tokens())
+        self._update_block.clear()
+
+    def _process_embed_update_block(self) -> None:
+        self._use_updated_embed_text()
+        for handler in self._update_block_handlers:
+            self._embed_update_block = handler.process_block(self._embed_update_block)
+        self._update_block.add_tokens(self._embed_update_block.get_tokens())
+        self._embed_update_block.clear()
+
+    def _push_updated_text(self, tokens: List[UsfmToken]) -> None:
         self._replace_stack.append(any(tokens))
         if tokens:
-            self._new_tokens.extend(tokens)
+            self._updated_text.extend(tokens)
 
-    def _add_new_tokens(self) -> None:
-        if self._new_tokens:
-            self._tokens.extend(self._new_tokens)
-        self._new_tokens.clear()
+    def _use_updated_text(self) -> None:
+        if self._updated_text:
+            self._update_block.add_inserted_text(self._updated_text)
+        self._updated_text.clear()
 
-    def _clear_new_tokens(self) -> None:
-        self._new_tokens.clear()
+    def _clear_updated_text(self) -> None:
+        self._updated_text.clear()
 
-    def _push_new_embed_tokens(self, tokens: List[UsfmToken]) -> None:
+    def _push_updated_embed_text(self, tokens: List[UsfmToken]) -> None:
         self._replace_stack.append(any(tokens))
         if tokens:
-            self._new_embed_tokens.extend(tokens)
+            self._updated_embed_text.extend(tokens)
 
-    def _add_new_embed_tokens(self) -> None:
-        if self._new_embed_tokens:
-            self._tokens.extend(self._new_embed_tokens)
-        self._new_embed_tokens.clear()
+    def _use_updated_embed_text(self) -> None:
+        if self._updated_embed_text:
+            self._embed_update_block.add_inserted_text(self._updated_embed_text)
+        self._updated_embed_text.clear()
 
-    def _clear_new_embed_tokens(self) -> None:
-        self._new_embed_tokens.clear()
+    def _clear_updated_embed_text(self) -> None:
+        self._updated_embed_text.clear()
 
-    def _push_token_as_previous(self) -> None:
+    def _push_updated_text_as_previous(self) -> None:
         self._replace_stack.append(self._replace_stack[-1])
 
     def _pop_new_tokens(self) -> None:

From 5b073250aabc9e174c1b86823b57b2388a014c7e Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Thu, 3 Apr 2025 12:50:46 -0400
Subject: [PATCH 02/11] Fix the tests

---
 machine/corpora/update_usfm_parser_handler.py | 25 +++++++++++--------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index c05989d9..dd80c07a 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -46,7 +46,7 @@ def __init__(
         self._embed_update_block: ScriptureUpdateBlock = ScriptureUpdateBlock()
         self._id_text = id_text
         if update_block_handlers is None:
-            self._update_block_handlers = [ScriptureUpdateBlockHandlerFirstElementsFirst()]
+            self._update_block_handlers = []
         else:
             self._update_block_handlers = update_block_handlers
         if preserve_paragraph_styles is None:
@@ -79,7 +79,7 @@ def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
         start_book_tokens: List[UsfmToken] = []
         if self._id_text is not None:
             start_book_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=self._id_text + " "))
-        self._update_block.add_tokens(start_book_tokens)
+        self._push_updated_text(start_book_tokens)
 
         super().start_book(state, marker, code)
 
@@ -109,7 +109,6 @@ def start_para(
         super().start_para(state, marker, unknown, attributes)
 
     def end_para(self, state: UsfmParserState, marker: str) -> None:
-        self._process_update_block()
         super().end_para(state, marker)
         self._in_preserved_paragraph = False
 
@@ -125,7 +124,6 @@ def start_cell(self, state: UsfmParserState, marker: str, align: str, colspan: i
 
     def end_cell(self, state: UsfmParserState, marker: str) -> None:
         self._collect_tokens(state)
-        self._process_update_block()
         super().end_cell(state, marker)
 
     def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> None:
@@ -136,7 +134,6 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
     def end_sidebar(self, state: UsfmParserState, marker: str, closed: bool) -> None:
         if closed:
             self._collect_tokens(state)
-            self._process_update_block()
 
         super().end_sidebar(state, marker, closed)
 
@@ -148,8 +145,8 @@ def chapter(
         alt_number: str,
         pub_number: str,
     ) -> None:
-        self._collect_tokens(state)
         self._process_update_block()
+        self._collect_tokens(state)
 
         super().chapter(state, number, marker, alt_number, pub_number)
 
@@ -160,8 +157,8 @@ def milestone(
         start_milestone: bool,
         attributes: Sequence[UsfmAttribute],
     ) -> None:
-        self._collect_tokens(state)
         self._process_update_block()
+        self._collect_tokens(state)
 
         super().milestone(state, marker, start_milestone, attributes)
 
@@ -173,8 +170,8 @@ def verse(
         alt_number: str,
         pub_number: str,
     ) -> None:
-        self._collect_tokens(state)
         self._process_update_block()
+        self._collect_tokens(state)
 
         super().verse(state, number, marker, alt_number, pub_number)
 
@@ -325,12 +322,20 @@ def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> List[str]:
     def _collect_tokens(self, state: UsfmParserState) -> None:
         self._use_updated_text()
         while self._token_index <= state.index + state.special_token_count:
-            self._update_block.add_token(state.tokens[self._token_index])
+            token = state.tokens[self._token_index]
+            if self._is_in_embed(token.marker):
+                self._embed_update_block.add_token(token)
+            else:
+                self._update_block.add_token(token)
             self._token_index += 1
 
     def _skip_tokens(self, state: UsfmParserState) -> None:
         while self._token_index <= state.index + state.special_token_count:
-            self._update_block.add_token(state.tokens[self._token_index], marked_for_removal=True)
+            token = state.tokens[self._token_index]
+            if self._is_in_embed(token.marker):
+                self._embed_update_block.add_token(token, marked_for_removal=True)
+            else:
+                self._update_block.add_token(token, marked_for_removal=True)
             self._token_index += 1
         self._token_index = state.index + 1 + state.special_token_count
 

From d963c7aa0ca0eae893587f2d309159746c9d3fc4 Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Thu, 3 Apr 2025 13:14:24 -0400
Subject: [PATCH 03/11] I want to process the data in segments that correspond
 to individual translations.  These updates make it happen.

---
 .vscode/settings.json                            | 3 +++
 machine/corpora/usfm_parser_state.py             | 4 ++++
 tests/corpora/test_update_usfm_parser_handler.py | 3 +++
 3 files changed, 10 insertions(+)

diff --git a/.vscode/settings.json b/.vscode/settings.json
index fe6e784e..63beb3c1 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -13,6 +13,9 @@
       "source.organizeImports": "explicit"
     },
   },
+  "files.associations": {
+    "*.SFM": "usfm",
+  },
   "black-formatter.path": [
     "poetry",
     "run",
diff --git a/machine/corpora/usfm_parser_state.py b/machine/corpora/usfm_parser_state.py
index 3f8b40f1..3d0b9e82 100644
--- a/machine/corpora/usfm_parser_state.py
+++ b/machine/corpora/usfm_parser_state.py
@@ -108,6 +108,10 @@ def is_verse_para(self) -> bool:
 
     @property
     def is_verse_text(self) -> bool:
+        # anything before verse 1 is not verse text
+        if self.verse_ref.verse_num == 0:
+            return False
+
         # Sidebars and notes are not verse text
         if any(e.type in {UsfmElementType.SIDEBAR, UsfmElementType.NOTE} for e in self._stack):
             return False
diff --git a/tests/corpora/test_update_usfm_parser_handler.py b/tests/corpora/test_update_usfm_parser_handler.py
index 431e7e41..c6cf8cea 100644
--- a/tests/corpora/test_update_usfm_parser_handler.py
+++ b/tests/corpora/test_update_usfm_parser_handler.py
@@ -202,6 +202,7 @@ def test_paragraph_in_verse():
     ]
     usfm = r"""\id MAT - Test
 \c 1
+\p paragraph not in a verse
 \v 1 verse 1 \p inner verse paragraph
 \s1 Section Header
 \v 2 Verse 2 \p inner verse paragraph
@@ -211,6 +212,7 @@ def test_paragraph_in_verse():
 
     result = r"""\id MAT - Test
 \c 1
+\p paragraph not in a verse
 \v 1 Update 1
 \s1 Section Header
 \v 2 Verse 2
@@ -228,6 +230,7 @@ def test_paragraph_in_verse():
 
     result_strip = r"""\id MAT
 \c 1
+\p 
 \v 1 Update 1
 \s1
 \v 2

From 747120733cd9fe548fd5de2fd1ba65c5a7216cff Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Thu, 3 Apr 2025 13:39:14 -0400
Subject: [PATCH 04/11] Added more test framework

---
 .../paratext_project_text_updater_base.py     |   4 +
 machine/corpora/update_usfm_parser_handler.py |   2 +
 .../test_update_scripture_block_updater.py    | 119 ++++++++++++++++++
 3 files changed, 125 insertions(+)
 create mode 100644 tests/corpora/test_update_scripture_block_updater.py

diff --git a/machine/corpora/paratext_project_text_updater_base.py b/machine/corpora/paratext_project_text_updater_base.py
index a56f2db0..b284cb51 100644
--- a/machine/corpora/paratext_project_text_updater_base.py
+++ b/machine/corpora/paratext_project_text_updater_base.py
@@ -1,6 +1,8 @@
 from abc import ABC, abstractmethod
 from typing import BinaryIO, Optional, Sequence, Tuple, Union
 
+from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
+
 from ..utils.typeshed import StrPath
 from .paratext_project_settings import ParatextProjectSettings
 from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
@@ -26,6 +28,7 @@ def update_usfm(
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
         preserve_paragraph_styles: Optional[Sequence[str]] = None,
+        update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
     ) -> Optional[str]:
         file_name: str = self._settings.get_book_file_name(book_id)
         if not self._exists(file_name):
@@ -40,6 +43,7 @@ def update_usfm(
             embed_behavior,
             style_behavior,
             preserve_paragraph_styles,
+            update_block_handlers=update_block_handlers,
         )
         try:
             parse_usfm(usfm, handler, self._settings.stylesheet, self._settings.versification)
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index dd80c07a..54856a2d 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -109,6 +109,8 @@ def start_para(
         super().start_para(state, marker, unknown, attributes)
 
     def end_para(self, state: UsfmParserState, marker: str) -> None:
+        if not state.is_verse_text:
+            self._process_update_block()
         super().end_para(state, marker)
         self._in_preserved_paragraph = False
 
diff --git a/tests/corpora/test_update_scripture_block_updater.py b/tests/corpora/test_update_scripture_block_updater.py
new file mode 100644
index 00000000..32d9057a
--- /dev/null
+++ b/tests/corpora/test_update_scripture_block_updater.py
@@ -0,0 +1,119 @@
+from typing import List, Optional, Sequence, Tuple
+
+from machine.corpora.scripture_update_block_handler_first_elements_first import (
+    ScriptureUpdateBlockHandlerFirstElementsFirst,
+)
+
+from machine.corpora.scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
+from testutils.corpora_test_helpers import USFM_TEST_PROJECT_PATH
+
+from machine.corpora import (
+    FileParatextProjectTextUpdater,
+    ScriptureRef,
+    UpdateUsfmMarkerBehavior,
+    UpdateUsfmParserHandler,
+    UpdateUsfmTextBehavior,
+    parse_usfm,
+)
+
+
+def test_preserve_paragraphs():
+    rows = [
+        (scr_ref("MAT 1:1"), str("U1")),
+        (
+            scr_ref("MAT 1:1/1:f"),
+            str("UF1"),
+        ),
+        (scr_ref("MAT 1:2"), str("U2")),
+        (
+            scr_ref("MAT 1:2/1:f"),
+            str("UF2"),
+        ),
+        (scr_ref("MAT 1:3"), str("U3")),
+        (
+            scr_ref("MAT 1:3/1:f"),
+            str("UF3"),
+        ),
+    ]
+    usfm = r"""\id MAT
+\c 1
+\v 1 \f \ft \fm ' \fm* hello world \f* it comes first
+\v 2 it comes \f \ft hello \fm ' \fm* world \f* middling
+\v 3 it comes last \f \ft  hello world \fm ' \fm* \f* 
+"""
+
+    target = update_usfm(rows, usfm)
+    result = r"""\id MAT
+\c 1
+\v 1 U1 \f \ft UF1 \fm ' \fm*\f* 
+\v 2 U2 \f \ft UF2 \fm ' \fm*\f* 
+\v 3 U3 \f \ft UF3 \fm ' \fm*\f* 
+"""
+
+    assess(target, result)
+
+    target_first_element = update_usfm(
+        rows, usfm, update_block_handlers=[ScriptureUpdateBlockHandlerFirstElementsFirst()]
+    )
+    result_first_element = r"""\id MAT
+\c 1
+\v 1 \f \ft \fm ' \fm* UF1 \f* U1 
+\v 2 U2 \f \ft UF2 \fm ' \fm*\f* 
+\v 3 U3 \f \ft UF3 \fm ' \fm*\f* 
+"""
+    assess(target_first_element, result_first_element)
+
+
+def scr_ref(*refs: str) -> List[ScriptureRef]:
+    return [ScriptureRef.parse(ref) for ref in refs]
+
+
+def update_usfm(
+    rows: Optional[Sequence[Tuple[Sequence[ScriptureRef], str]]] = None,
+    source: Optional[str] = None,
+    id_text: Optional[str] = None,
+    text_behavior: UpdateUsfmTextBehavior = UpdateUsfmTextBehavior.PREFER_NEW,
+    paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
+    embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
+    style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
+    preserve_paragraph_styles: Optional[Sequence[str]] = None,
+    update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
+) -> Optional[str]:
+    if source is None:
+        updater = FileParatextProjectTextUpdater(USFM_TEST_PROJECT_PATH)
+        return updater.update_usfm(
+            "MAT",
+            rows,
+            id_text,
+            text_behavior,
+            paragraph_behavior,
+            embed_behavior,
+            style_behavior,
+            preserve_paragraph_styles,
+            update_block_handlers,
+        )
+    else:
+        source = source.strip().replace("\r\n", "\n") + "\r\n"
+        updater = UpdateUsfmParserHandler(
+            rows,
+            id_text,
+            text_behavior,
+            paragraph_behavior,
+            embed_behavior,
+            style_behavior,
+            preserve_paragraph_styles,
+            update_block_handlers,
+        )
+        parse_usfm(source, updater)
+        return updater.get_usfm()
+
+
+def assess(target: Optional[str], truth: str) -> None:
+    assert target is not None
+    for target_line, truth_line in zip(target.split("\n"), truth.split("\n")):
+        assert target_line.strip() == truth_line.strip()
+
+
+def read_usfm() -> str:
+    with (USFM_TEST_PROJECT_PATH / "41MATTes.SFM").open("r", encoding="utf-8-sig", newline="\r\n") as file:
+        return file.read()

From 8a7c993c6220c00662a7eaf19358acdd6a55c31c Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Thu, 10 Apr 2025 13:33:40 -0400
Subject: [PATCH 05/11] Updates for reviewer comments

---
 README.md                                     |   8 ++
 .../paratext_project_text_updater_base.py     |   4 +-
 machine/corpora/scripture_update_block.py     |   4 +
 ...e.py => scripture_update_block_handler.py} |   3 +-
 ...date_block_handler_first_elements_first.py |  23 ----
 machine/corpora/update_usfm_parser_handler.py |   5 +-
 .../test_update_scripture_block_updater.py    | 119 ------------------
 tests/corpora/test_usfm_file_text.py          |  32 ++---
 8 files changed, 34 insertions(+), 164 deletions(-)
 rename machine/corpora/{scripture_update_block_handler_base.py => scripture_update_block_handler.py} (80%)
 delete mode 100644 machine/corpora/scripture_update_block_handler_first_elements_first.py
 delete mode 100644 tests/corpora/test_update_scripture_block_updater.py

diff --git a/README.md b/README.md
index 577e58d3..a11a7ea1 100644
--- a/README.md
+++ b/README.md
@@ -10,6 +10,14 @@ Machine is available as a pip package:
 pip install sil-machine
 ```
 
+## setup
+
+You can use the devcontainer (normal process), or you can setup outside of one, especially if you don't have a GPU.
+
+* Install poetry
+* `poetry install` for everything
+* `poetry install --without gpu` if you don't have a NVIDA gpu
+
 ## Tutorials
 
 If you would like to find out more about how to use Machine, check out the tutorial Jupyter notebooks:
diff --git a/machine/corpora/paratext_project_text_updater_base.py b/machine/corpora/paratext_project_text_updater_base.py
index b284cb51..02b0566b 100644
--- a/machine/corpora/paratext_project_text_updater_base.py
+++ b/machine/corpora/paratext_project_text_updater_base.py
@@ -1,7 +1,7 @@
 from abc import ABC, abstractmethod
 from typing import BinaryIO, Optional, Sequence, Tuple, Union
 
-from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
+from .scripture_update_block_handler import ScriptureUpdateBlockHandler
 
 from ..utils.typeshed import StrPath
 from .paratext_project_settings import ParatextProjectSettings
@@ -28,7 +28,7 @@ def update_usfm(
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
         preserve_paragraph_styles: Optional[Sequence[str]] = None,
-        update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
+        update_block_handlers: Optional[list[ScriptureUpdateBlockHandler]] = None,
     ) -> Optional[str]:
         file_name: str = self._settings.get_book_file_name(book_id)
         if not self._exists(file_name):
diff --git a/machine/corpora/scripture_update_block.py b/machine/corpora/scripture_update_block.py
index 00787cf2..afb9e75a 100644
--- a/machine/corpora/scripture_update_block.py
+++ b/machine/corpora/scripture_update_block.py
@@ -11,6 +11,10 @@ def __init__(self) -> None:
         self._ref: ScriptureRef = ScriptureRef()
         self._elements: list[ScriptureUpdateElement] = []
 
+    @property
+    def elements(self) -> list[ScriptureUpdateElement]:
+        return self._elements
+
     def add_existing_text(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
         self._elements.append(
             ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
diff --git a/machine/corpora/scripture_update_block_handler_base.py b/machine/corpora/scripture_update_block_handler.py
similarity index 80%
rename from machine/corpora/scripture_update_block_handler_base.py
rename to machine/corpora/scripture_update_block_handler.py
index 2998a0d9..ff1d6f9e 100644
--- a/machine/corpora/scripture_update_block_handler_base.py
+++ b/machine/corpora/scripture_update_block_handler.py
@@ -1,9 +1,10 @@
 from __future__ import annotations
+from abc import ABC
 
 from .scripture_update_block import ScriptureUpdateBlock
 
 
-class ScriptureUpdateBlockHandlerBase:
+class ScriptureUpdateBlockHandler(ABC):
 
     def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock:
         raise NotImplementedError("Must be implemented in subclass")
diff --git a/machine/corpora/scripture_update_block_handler_first_elements_first.py b/machine/corpora/scripture_update_block_handler_first_elements_first.py
deleted file mode 100644
index 17f44798..00000000
--- a/machine/corpora/scripture_update_block_handler_first_elements_first.py
+++ /dev/null
@@ -1,23 +0,0 @@
-from __future__ import annotations
-
-from .scripture_update_block import ScriptureUpdateBlock
-from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
-from .scripture_update_element import ScriptureUpdateElementType
-
-
-class ScriptureUpdateBlockHandlerFirstElementsFirst(ScriptureUpdateBlockHandlerBase):
-
-    def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock:
-        # If a paragraph, embed or style element occurs before existing text, move it before inserted text as well.
-        current_insert_index = 0
-        for current_index in range(len(block._elements)):
-            element = block._elements[current_index]
-            if element.type == ScriptureUpdateElementType.EXISTING_TEXT:
-                # we found existing text, so we stop looking for elements to move
-                break
-            if current_index != current_insert_index and element.type != ScriptureUpdateElementType.INSERTED_TEXT:
-                block._elements.remove(element)
-                block._elements.insert(current_insert_index, element)
-                current_insert_index += 1
-
-        return block
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index 54856a2d..ecdf0881 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -5,8 +5,7 @@
 from .scripture_ref import ScriptureRef
 from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler
 from .scripture_update_block import ScriptureUpdateBlock
-from .scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
-from .scripture_update_block_handler_first_elements_first import ScriptureUpdateBlockHandlerFirstElementsFirst
+from .scripture_update_block_handler import ScriptureUpdateBlockHandler
 from .usfm_parser_state import UsfmParserState
 from .usfm_stylesheet import UsfmStylesheet
 from .usfm_token import UsfmAttribute, UsfmToken, UsfmTokenType
@@ -35,7 +34,7 @@ def __init__(
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
         preserve_paragraph_styles: Optional[Sequence[str]] = None,
-        update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
+        update_block_handlers: Optional[list[ScriptureUpdateBlockHandler]] = None,
     ) -> None:
         super().__init__()
         self._rows = rows or []
diff --git a/tests/corpora/test_update_scripture_block_updater.py b/tests/corpora/test_update_scripture_block_updater.py
deleted file mode 100644
index 32d9057a..00000000
--- a/tests/corpora/test_update_scripture_block_updater.py
+++ /dev/null
@@ -1,119 +0,0 @@
-from typing import List, Optional, Sequence, Tuple
-
-from machine.corpora.scripture_update_block_handler_first_elements_first import (
-    ScriptureUpdateBlockHandlerFirstElementsFirst,
-)
-
-from machine.corpora.scripture_update_block_handler_base import ScriptureUpdateBlockHandlerBase
-from testutils.corpora_test_helpers import USFM_TEST_PROJECT_PATH
-
-from machine.corpora import (
-    FileParatextProjectTextUpdater,
-    ScriptureRef,
-    UpdateUsfmMarkerBehavior,
-    UpdateUsfmParserHandler,
-    UpdateUsfmTextBehavior,
-    parse_usfm,
-)
-
-
-def test_preserve_paragraphs():
-    rows = [
-        (scr_ref("MAT 1:1"), str("U1")),
-        (
-            scr_ref("MAT 1:1/1:f"),
-            str("UF1"),
-        ),
-        (scr_ref("MAT 1:2"), str("U2")),
-        (
-            scr_ref("MAT 1:2/1:f"),
-            str("UF2"),
-        ),
-        (scr_ref("MAT 1:3"), str("U3")),
-        (
-            scr_ref("MAT 1:3/1:f"),
-            str("UF3"),
-        ),
-    ]
-    usfm = r"""\id MAT
-\c 1
-\v 1 \f \ft \fm ' \fm* hello world \f* it comes first
-\v 2 it comes \f \ft hello \fm ' \fm* world \f* middling
-\v 3 it comes last \f \ft  hello world \fm ' \fm* \f* 
-"""
-
-    target = update_usfm(rows, usfm)
-    result = r"""\id MAT
-\c 1
-\v 1 U1 \f \ft UF1 \fm ' \fm*\f* 
-\v 2 U2 \f \ft UF2 \fm ' \fm*\f* 
-\v 3 U3 \f \ft UF3 \fm ' \fm*\f* 
-"""
-
-    assess(target, result)
-
-    target_first_element = update_usfm(
-        rows, usfm, update_block_handlers=[ScriptureUpdateBlockHandlerFirstElementsFirst()]
-    )
-    result_first_element = r"""\id MAT
-\c 1
-\v 1 \f \ft \fm ' \fm* UF1 \f* U1 
-\v 2 U2 \f \ft UF2 \fm ' \fm*\f* 
-\v 3 U3 \f \ft UF3 \fm ' \fm*\f* 
-"""
-    assess(target_first_element, result_first_element)
-
-
-def scr_ref(*refs: str) -> List[ScriptureRef]:
-    return [ScriptureRef.parse(ref) for ref in refs]
-
-
-def update_usfm(
-    rows: Optional[Sequence[Tuple[Sequence[ScriptureRef], str]]] = None,
-    source: Optional[str] = None,
-    id_text: Optional[str] = None,
-    text_behavior: UpdateUsfmTextBehavior = UpdateUsfmTextBehavior.PREFER_NEW,
-    paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
-    embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
-    style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
-    preserve_paragraph_styles: Optional[Sequence[str]] = None,
-    update_block_handlers: Optional[list[ScriptureUpdateBlockHandlerBase]] = None,
-) -> Optional[str]:
-    if source is None:
-        updater = FileParatextProjectTextUpdater(USFM_TEST_PROJECT_PATH)
-        return updater.update_usfm(
-            "MAT",
-            rows,
-            id_text,
-            text_behavior,
-            paragraph_behavior,
-            embed_behavior,
-            style_behavior,
-            preserve_paragraph_styles,
-            update_block_handlers,
-        )
-    else:
-        source = source.strip().replace("\r\n", "\n") + "\r\n"
-        updater = UpdateUsfmParserHandler(
-            rows,
-            id_text,
-            text_behavior,
-            paragraph_behavior,
-            embed_behavior,
-            style_behavior,
-            preserve_paragraph_styles,
-            update_block_handlers,
-        )
-        parse_usfm(source, updater)
-        return updater.get_usfm()
-
-
-def assess(target: Optional[str], truth: str) -> None:
-    assert target is not None
-    for target_line, truth_line in zip(target.split("\n"), truth.split("\n")):
-        assert target_line.strip() == truth_line.strip()
-
-
-def read_usfm() -> str:
-    with (USFM_TEST_PROJECT_PATH / "41MATTes.SFM").open("r", encoding="utf-8-sig", newline="\r\n") as file:
-        return file.read()
diff --git a/tests/corpora/test_usfm_file_text.py b/tests/corpora/test_usfm_file_text.py
index 44451708..e046d71d 100644
--- a/tests/corpora/test_usfm_file_text.py
+++ b/tests/corpora/test_usfm_file_text.py
@@ -66,7 +66,7 @@ def test_get_rows_nonempty_text_all_text() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 50
+    assert len(rows) == 52
 
     assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/1:h", corpus.versification)
     assert rows[0].text == "Matthew"
@@ -113,20 +113,20 @@ def test_get_rows_nonempty_text_all_text() -> None:
     assert scripture_ref(rows[24]) == ScriptureRef.parse("MAT 2:0/4:p", corpus.versification)
     assert not rows[24].text
 
-    assert scripture_ref(rows[26]) == ScriptureRef.parse("MAT 2:1/1:f", corpus.versification)
-    assert rows[26].text == "This is a footnote."
+    assert scripture_ref(rows[27]) == ScriptureRef.parse("MAT 2:1/1:f", corpus.versification)
+    assert rows[27].text == "This is a footnote."
 
-    assert scripture_ref(rows[29]) == ScriptureRef.parse("MAT 2:3/2:esb/1:ms", corpus.versification)
-    assert rows[29].text == "This is a sidebar"
+    assert scripture_ref(rows[30]) == ScriptureRef.parse("MAT 2:3/2:esb/1:ms", corpus.versification)
+    assert rows[30].text == "This is a sidebar"
 
-    assert scripture_ref(rows[30]) == ScriptureRef.parse("MAT 2:3/2:esb/2:p", corpus.versification)
-    assert rows[30].text == "Here is some sidebar content."
+    assert scripture_ref(rows[31]) == ScriptureRef.parse("MAT 2:3/2:esb/2:p", corpus.versification)
+    assert rows[31].text == "Here is some sidebar content."
 
-    assert scripture_ref(rows[36]) == ScriptureRef.parse("MAT 2:7a/1:s", corpus.versification)
-    assert rows[36].text == "Section header"
+    assert scripture_ref(rows[37]) == ScriptureRef.parse("MAT 2:7a/1:s", corpus.versification)
+    assert rows[37].text == "Section header"
 
-    assert scripture_ref(rows[43]) == ScriptureRef.parse("MAT 2:12/1:restore", corpus.versification)
-    assert rows[43].text == "restore information"
+    assert scripture_ref(rows[44]) == ScriptureRef.parse("MAT 2:12/1:restore", corpus.versification)
+    assert rows[44].text == "restore information"
 
 
 def test_get_rows_sentence_start() -> None:
@@ -220,7 +220,7 @@ def test_get_rows_include_markers_all_text() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 46
+    assert len(rows) == 48
 
     assert scripture_ref(rows[2]) == ScriptureRef.parse("MAT 1:0/3:ip", corpus.versification)
     assert rows[2].text == "An introduction to Matthew\\fe + \\ft This is an endnote.\\fe*"
@@ -240,11 +240,11 @@ def test_get_rows_include_markers_all_text() -> None:
     assert scripture_ref(rows[20]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
     assert rows[20].text == "Chapter \\it Two \\it*"
 
-    assert scripture_ref(rows[22]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
-    assert rows[22].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
+    assert scripture_ref(rows[23]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
+    assert rows[23].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
 
-    assert scripture_ref(rows[26]) == ScriptureRef.parse("MAT 2:3/2:esb/2:p", corpus.versification)
-    assert rows[26].text == "Here is some sidebar // content."
+    assert scripture_ref(rows[27]) == ScriptureRef.parse("MAT 2:3/2:esb/2:p", corpus.versification)
+    assert rows[27].text == "Here is some sidebar // content."
 
 
 def test_get_rows_invalid_id() -> None:

From 92ee88e37a915a70c7e05f4bae336f6c6bf2d934 Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Thu, 10 Apr 2025 14:24:53 -0400
Subject: [PATCH 06/11] linting

---
 .github/workflows/ci.yml                      |   2 +-
 README.md                                     |   8 -
 .../paratext_project_terms_parser_base.py     |   6 +-
 .../paratext_project_text_updater_base.py     |   3 +-
 .../corpora/scripture_update_block_handler.py |   1 +
 .../zip_paratext_project_terms_parser.py      |   2 +-
 poetry.lock                                   | 339 +++++++++++++++---
 pyproject.toml                                |   2 +-
 .../test_update_usfm_parser_handler.py        |   2 +-
 9 files changed, 308 insertions(+), 57 deletions(-)

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 380c8e10..044a4db5 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,7 +55,7 @@ jobs:
           node-version: "14"
       - name: Lint with pyright
         run: |
-          npm install -g pyright@1.1.386
+          npm install -g pyright@1.1.399
           poetry run pyright
       - name: Test with pytest
         run: poetry run pytest --cov --cov-report=xml
diff --git a/README.md b/README.md
index a11a7ea1..577e58d3 100644
--- a/README.md
+++ b/README.md
@@ -10,14 +10,6 @@ Machine is available as a pip package:
 pip install sil-machine
 ```
 
-## setup
-
-You can use the devcontainer (normal process), or you can setup outside of one, especially if you don't have a GPU.
-
-* Install poetry
-* `poetry install` for everything
-* `poetry install --without gpu` if you don't have a NVIDA gpu
-
 ## Tutorials
 
 If you would like to find out more about how to use Machine, check out the tutorial Jupyter notebooks:
diff --git a/machine/corpora/paratext_project_terms_parser_base.py b/machine/corpora/paratext_project_terms_parser_base.py
index ea71ab80..00496443 100644
--- a/machine/corpora/paratext_project_terms_parser_base.py
+++ b/machine/corpora/paratext_project_terms_parser_base.py
@@ -45,7 +45,7 @@ def parse(self, term_categories: Sequence[str], use_term_glosses: bool = True) -
         else:
             term_id_to_category_dict = {}
 
-        terms_glosses_doc: Optional[ElementTree.ElementTree] = None
+        terms_glosses_doc: Optional[ElementTree.ElementTree[ElementTree.Element]] = None
         resource_name = None
         if self._settings.language_code is not None:
             resource_name = _SUPPORTED_LANGUAGE_TERMS_LOCALIZATION_XMLS.get(self._settings.language_code)
@@ -57,7 +57,7 @@ def parse(self, term_categories: Sequence[str], use_term_glosses: bool = True) -
             with open_binary(_SUPPORTED_LANGUAGE_TERMS_LOCALIZATION_XMLS_PACKAGE, resource_name) as stream:
                 terms_glosses_doc = ElementTree.parse(stream)
 
-        term_renderings_doc: Optional[ElementTree.ElementTree] = None
+        term_renderings_doc: Optional[ElementTree.ElementTree[ElementTree.Element]] = None
         if self._exists("TermRenderings.xml"):
             with self._open("TermRenderings.xml") as stream:
                 term_renderings_doc = ElementTree.parse(stream)
@@ -136,7 +136,7 @@ def _strip_parens(term_string: str, left: str = "(", right: str = ")") -> str:
     return term_string
 
 
-def _get_category_per_id(biblical_terms_doc: ElementTree.ElementTree) -> Dict[str, str]:
+def _get_category_per_id(biblical_terms_doc: ElementTree.ElementTree[ElementTree.Element]) -> Dict[str, str]:
     term_id_to_category_dict: Dict[str, str] = {}
 
     for term in biblical_terms_doc.findall(".//Term"):
diff --git a/machine/corpora/paratext_project_text_updater_base.py b/machine/corpora/paratext_project_text_updater_base.py
index 02b0566b..8ba806a8 100644
--- a/machine/corpora/paratext_project_text_updater_base.py
+++ b/machine/corpora/paratext_project_text_updater_base.py
@@ -1,12 +1,11 @@
 from abc import ABC, abstractmethod
 from typing import BinaryIO, Optional, Sequence, Tuple, Union
 
-from .scripture_update_block_handler import ScriptureUpdateBlockHandler
-
 from ..utils.typeshed import StrPath
 from .paratext_project_settings import ParatextProjectSettings
 from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
 from .scripture_ref import ScriptureRef
+from .scripture_update_block_handler import ScriptureUpdateBlockHandler
 from .update_usfm_parser_handler import UpdateUsfmMarkerBehavior, UpdateUsfmParserHandler, UpdateUsfmTextBehavior
 from .usfm_parser import parse_usfm
 
diff --git a/machine/corpora/scripture_update_block_handler.py b/machine/corpora/scripture_update_block_handler.py
index ff1d6f9e..bcbe8fb8 100644
--- a/machine/corpora/scripture_update_block_handler.py
+++ b/machine/corpora/scripture_update_block_handler.py
@@ -1,4 +1,5 @@
 from __future__ import annotations
+
 from abc import ABC
 
 from .scripture_update_block import ScriptureUpdateBlock
diff --git a/machine/corpora/zip_paratext_project_terms_parser.py b/machine/corpora/zip_paratext_project_terms_parser.py
index 3f781b21..ebc208a0 100644
--- a/machine/corpora/zip_paratext_project_terms_parser.py
+++ b/machine/corpora/zip_paratext_project_terms_parser.py
@@ -19,5 +19,5 @@ def _exists(self, file_name: StrPath) -> bool:
 
     def _open(self, file_name: StrPath) -> Optional[BinaryIO]:
         if file_name in self._archive.namelist():
-            return BytesIO(self._archive.read(file_name))
+            return BytesIO(self._archive.read(str(file_name)))
         return None
diff --git a/poetry.lock b/poetry.lock
index 4d8ded6e..a290b5dd 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -6,6 +6,8 @@ version = "0.26.1"
 description = "Accelerate"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["gpu"]
+markers = "sys_platform == \"win32\" or sys_platform == \"linux\""
 files = [
     {file = "accelerate-0.26.1-py3-none-any.whl", hash = "sha256:04df826b84ac7bad8a0a8ab90e6aeacdecb1ea5a2d744d7e94f6735c29183227"},
     {file = "accelerate-0.26.1.tar.gz", hash = "sha256:bf63716b6bd9460d87da970cf4d833abb824ca0aa633be36b741e63a1b504f89"},
@@ -36,6 +38,8 @@ version = "2.4.3"
 description = "Happy Eyeballs for asyncio"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"},
     {file = "aiohappyeyeballs-2.4.3.tar.gz", hash = "sha256:75cf88a15106a5002a8eb1dab212525c00d1f4c0fa96e551c9fbe6f09a621586"},
@@ -47,6 +51,8 @@ version = "3.10.10"
 description = "Async http client/server framework (asyncio)"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"},
     {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:7b06b7843929e41a94ea09eb1ce3927865387e3e23ebe108e0d0d09b08d25be9"},
@@ -151,7 +157,7 @@ multidict = ">=4.5,<7.0"
 yarl = ">=1.12.0,<2.0"
 
 [package.extras]
-speedups = ["Brotli", "aiodns (>=3.2.0)", "brotlicffi"]
+speedups = ["Brotli ; platform_python_implementation == \"CPython\"", "aiodns (>=3.2.0) ; sys_platform == \"linux\" or sys_platform == \"darwin\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 
 [[package]]
 name = "aiosignal"
@@ -159,6 +165,8 @@ version = "1.3.1"
 description = "aiosignal: a list of registered asynchronous callbacks"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"},
     {file = "aiosignal-1.3.1.tar.gz", hash = "sha256:54cd96e15e1649b75d6c87526a6ff0b6c1b0dd3459f43d9ca11d48c339b68cfc"},
@@ -173,6 +181,7 @@ version = "4.6.2.post1"
 description = "High level compatibility layer for multiple asynchronous event loop implementations"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "anyio-4.6.2.post1-py3-none-any.whl", hash = "sha256:6d170c36fba3bdd840c73d3868c1e777e33676a69c3a72cf0a0d5d6d8009b61d"},
     {file = "anyio-4.6.2.post1.tar.gz", hash = "sha256:4c8bc31ccdb51c7f7bd251f51c609e038d63e34219b44aa86e47576389880b4c"},
@@ -186,7 +195,7 @@ typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""}
 
 [package.extras]
 doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"]
-test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1)", "uvloop (>=0.21.0b1)"]
+test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "truststore (>=0.9.1) ; python_version >= \"3.10\"", "uvloop (>=0.21.0b1) ; platform_python_implementation == \"CPython\" and platform_system != \"Windows\""]
 trio = ["trio (>=0.26.1)"]
 
 [[package]]
@@ -195,6 +204,8 @@ version = "0.1.4"
 description = "Disable App Nap on macOS >= 10.9"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
+markers = "platform_system == \"Darwin\""
 files = [
     {file = "appnope-0.1.4-py2.py3-none-any.whl", hash = "sha256:502575ee11cd7a28c0205f379b525beefebab9d161b7c964670864014ed7213c"},
     {file = "appnope-0.1.4.tar.gz", hash = "sha256:1de3860566df9caf38f01f86f65e0e13e379af54f9e4bee1e66b48f2efffd1ee"},
@@ -206,6 +217,7 @@ version = "23.1.0"
 description = "Argon2 for Python"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "argon2_cffi-23.1.0-py3-none-any.whl", hash = "sha256:c670642b78ba29641818ab2e68bd4e6a78ba53b7eff7b4c3815ae16abf91c7ea"},
     {file = "argon2_cffi-23.1.0.tar.gz", hash = "sha256:879c3e79a2729ce768ebb7d36d4609e3a78a4ca2ec3a9f12286ca057e3d0db08"},
@@ -226,6 +238,7 @@ version = "21.2.0"
 description = "Low-level CFFI bindings for Argon2"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "argon2-cffi-bindings-21.2.0.tar.gz", hash = "sha256:bb89ceffa6c791807d1305ceb77dbfacc5aa499891d2c55661c6459651fc39e3"},
     {file = "argon2_cffi_bindings-21.2.0-cp36-abi3-macosx_10_9_x86_64.whl", hash = "sha256:ccb949252cb2ab3a08c02024acb77cfb179492d5701c7cbdbfd776124d4d2367"},
@@ -263,6 +276,7 @@ version = "1.3.0"
 description = "Better dates & times for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80"},
     {file = "arrow-1.3.0.tar.gz", hash = "sha256:d4540617648cb5f895730f1ad8c82a65f2dad0166f57b75f3ca54759c4d67a85"},
@@ -282,6 +296,7 @@ version = "2.4.1"
 description = "Annotate AST trees with source code positions"
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "asttokens-2.4.1-py2.py3-none-any.whl", hash = "sha256:051ed49c3dcae8913ea7cd08e46a606dba30b79993209636c4875bc1d637bc24"},
     {file = "asttokens-2.4.1.tar.gz", hash = "sha256:b03869718ba9a6eb027e134bfdf69f38a236d681c83c160d510768af11254ba0"},
@@ -291,8 +306,8 @@ files = [
 six = ">=1.12.0"
 
 [package.extras]
-astroid = ["astroid (>=1,<2)", "astroid (>=2,<4)"]
-test = ["astroid (>=1,<2)", "astroid (>=2,<4)", "pytest"]
+astroid = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\""]
+test = ["astroid (>=1,<2) ; python_version < \"3\"", "astroid (>=2,<4) ; python_version >= \"3\"", "pytest"]
 
 [[package]]
 name = "async-lru"
@@ -300,6 +315,7 @@ version = "2.0.4"
 description = "Simple LRU cache for asyncio"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "async-lru-2.0.4.tar.gz", hash = "sha256:b8a59a5df60805ff63220b2a0c5b5393da5521b113cd5465a44eb037d81a5627"},
     {file = "async_lru-2.0.4-py3-none-any.whl", hash = "sha256:ff02944ce3c288c5be660c42dbcca0742b32c3b279d6dceda655190240b99224"},
@@ -314,6 +330,8 @@ version = "4.0.3"
 description = "Timeout context manager for asyncio programs"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"huggingface\" and python_version < \"3.11\""
 files = [
     {file = "async-timeout-4.0.3.tar.gz", hash = "sha256:4640d96be84d82d02ed59ea2b7105a0f7b33abe8703703cd0ab0bf87c427522f"},
     {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"},
@@ -325,18 +343,20 @@ version = "24.2.0"
 description = "Classes Without Boilerplate"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
 files = [
     {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"},
     {file = "attrs-24.2.0.tar.gz", hash = "sha256:5cfb1b9148b5b086569baec03f20d7b6bf3bcacc9a42bebf87ffaaca362f6346"},
 ]
+markers = {main = "extra == \"huggingface\" or extra == \"jobs\""}
 
 [package.extras]
-benchmark = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-cov = ["cloudpickle", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-dev = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
+benchmark = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-codspeed", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
+cov = ["cloudpickle ; platform_python_implementation == \"CPython\"", "coverage[toml] (>=5.3)", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
+dev = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
 docs = ["cogapp", "furo", "myst-parser", "sphinx", "sphinx-notfound-page", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
-tests = ["cloudpickle", "hypothesis", "mypy (>=1.11.1)", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "pytest-xdist[psutil]"]
-tests-mypy = ["mypy (>=1.11.1)", "pytest-mypy-plugins"]
+tests = ["cloudpickle ; platform_python_implementation == \"CPython\"", "hypothesis", "mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\"", "pytest-xdist[psutil]"]
+tests-mypy = ["mypy (>=1.11.1) ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\"", "pytest-mypy-plugins ; platform_python_implementation == \"CPython\" and python_version >= \"3.9\" and python_version < \"3.13\""]
 
 [[package]]
 name = "babel"
@@ -344,6 +364,7 @@ version = "2.16.0"
 description = "Internationalization utilities"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b"},
     {file = "babel-2.16.0.tar.gz", hash = "sha256:d1f3554ca26605fe173f3de0c65f750f5a42f924499bf134de6423582298e316"},
@@ -358,6 +379,7 @@ version = "4.12.3"
 description = "Screen-scraping library"
 optional = false
 python-versions = ">=3.6.0"
+groups = ["dev"]
 files = [
     {file = "beautifulsoup4-4.12.3-py3-none-any.whl", hash = "sha256:b80878c9f40111313e55da8ba20bdba06d8fa3969fc68304167741bbf9e082ed"},
     {file = "beautifulsoup4-4.12.3.tar.gz", hash = "sha256:74e3d1928edc070d21748185c46e3fb33490f22f52a3addee9aee0f4f7781051"},
@@ -379,6 +401,7 @@ version = "24.10.0"
 description = "The uncompromising code formatter."
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "black-24.10.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:e6668650ea4b685440857138e5fe40cde4d652633b1bdffc62933d0db4ed9812"},
     {file = "black-24.10.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1c536fcf674217e87b8cc3657b81809d3c085d7bf3ef262ead700da345bfa6ea"},
@@ -425,6 +448,7 @@ version = "6.2.0"
 description = "An easy safelist-based HTML-sanitizing tool."
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "bleach-6.2.0-py3-none-any.whl", hash = "sha256:117d9c6097a7c3d22fd578fcd8d35ff1e125df6736f554da4e432fdd63f31e5e"},
     {file = "bleach-6.2.0.tar.gz", hash = "sha256:123e894118b8a599fd80d3ec1a6d4cc7ce4e5882b1317a7e1ba69b56e95f991f"},
@@ -442,6 +466,7 @@ version = "1.35.51"
 description = "The AWS SDK for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "boto3-1.35.51-py3-none-any.whl", hash = "sha256:c922f6a18958af9d8af0489d6d8503b517029d8159b26aa4859a8294561c72e9"},
     {file = "boto3-1.35.51.tar.gz", hash = "sha256:a57c6c7012ecb40c43e565a6f7a891f39efa990ff933eab63cd456f7501c2731"},
@@ -461,6 +486,7 @@ version = "1.35.51"
 description = "Low-level, data-driven core of boto 3."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "botocore-1.35.51-py3-none-any.whl", hash = "sha256:4d65b00111bd12b98e9f920ecab602cf619cc6a6d0be6e5dd53f517e4b92901c"},
     {file = "botocore-1.35.51.tar.gz", hash = "sha256:a9b3d1da76b3e896ad74605c01d88f596324a3337393d4bfbfa0d6c35822ca9c"},
@@ -483,10 +509,12 @@ version = "2024.8.30"
 description = "Python package for providing Mozilla's CA Bundle."
 optional = false
 python-versions = ">=3.6"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "certifi-2024.8.30-py3-none-any.whl", hash = "sha256:922820b53db7a7257ffbda3f597266d435245903d80737e34f8a45ff3e3230d8"},
     {file = "certifi-2024.8.30.tar.gz", hash = "sha256:bec941d2aa8195e248a60b31ff9f0558284cf01a52591ceda73ea9afffd69fd9"},
 ]
+markers = {main = "extra == \"huggingface\" or extra == \"jobs\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [[package]]
 name = "cffi"
@@ -494,6 +522,7 @@ version = "1.17.1"
 description = "Foreign Function Interface for Python calling C code."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"},
     {file = "cffi-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8f2cdc858323644ab277e9bb925ad72ae0e67f69e804f4898c070998d50b1a67"},
@@ -573,6 +602,7 @@ version = "2.1.1"
 description = "The Real First Universal Charset Detector. Open, modern and actively maintained alternative to Chardet."
 optional = false
 python-versions = ">=3.6.0"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "charset-normalizer-2.1.1.tar.gz", hash = "sha256:5a3d016c7c547f69d6f81fb0db9449ce888b418b5b9952cc5e6e66843e9dd845"},
     {file = "charset_normalizer-2.1.1-py3-none-any.whl", hash = "sha256:83e9a75d1911279afd89352c68b45348559d1fc0506b054b346651b5e7fee29f"},
@@ -587,6 +617,8 @@ version = "1.16.5"
 description = "ClearML - Auto-Magical Experiment Manager, Version Control, and MLOps for AI"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "clearml-1.16.5-py2.py3-none-any.whl", hash = "sha256:3caa00914e039cb2b62ca90795c3ca17077042ae1edcefc17bf13f695653480f"},
 ]
@@ -620,10 +652,12 @@ version = "8.1.7"
 description = "Composable command line interface toolkit"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "dev"]
 files = [
     {file = "click-8.1.7-py3-none-any.whl", hash = "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28"},
     {file = "click-8.1.7.tar.gz", hash = "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de"},
 ]
+markers = {main = "extra == \"huggingface\""}
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
@@ -634,10 +668,12 @@ version = "0.4.6"
 description = "Cross-platform colored terminal text."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "colorama-0.4.6-py2.py3-none-any.whl", hash = "sha256:4f1d9991f5acc0ca119f9d443620b77f9d6b33703e51011c16baf57afb285fc6"},
     {file = "colorama-0.4.6.tar.gz", hash = "sha256:08695f5cb7ed6e0531a20572697297273c47b8cae5a63ffc6d6ed5c201be6e44"},
 ]
+markers = {main = "extra == \"huggingface\" and platform_system == \"Windows\"", dev = "platform_system == \"Windows\" or sys_platform == \"win32\"", gpu = "(sys_platform == \"win32\" or sys_platform == \"linux\") and platform_system == \"Windows\""}
 
 [[package]]
 name = "comm"
@@ -645,6 +681,7 @@ version = "0.2.2"
 description = "Jupyter Python Comm implementation, for usage in ipykernel, xeus-python etc."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "comm-0.2.2-py3-none-any.whl", hash = "sha256:e6fb86cb70ff661ee8c9c14e7d36d6de3b4066f1441be4063df9c5009f0a64d3"},
     {file = "comm-0.2.2.tar.gz", hash = "sha256:3fd7a84065306e07bea1773df6eb8282de51ba82f77c72f9c85716ab11fe980e"},
@@ -662,6 +699,7 @@ version = "7.6.4"
 description = "Code coverage measurement for Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "coverage-7.6.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:5f8ae553cba74085db385d489c7a792ad66f7f9ba2ee85bfa508aeb84cf0ba07"},
     {file = "coverage-7.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8165b796df0bd42e10527a3f493c592ba494f16ef3c8b531288e3d0d72c1f6f0"},
@@ -731,7 +769,7 @@ files = [
 tomli = {version = "*", optional = true, markers = "python_full_version <= \"3.11.0a6\" and extra == \"toml\""}
 
 [package.extras]
-toml = ["tomli"]
+toml = ["tomli ; python_full_version <= \"3.11.0a6\""]
 
 [[package]]
 name = "cython"
@@ -812,6 +850,8 @@ version = "2.21.0"
 description = "HuggingFace community-driven open-source library of datasets"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "datasets-2.21.0-py3-none-any.whl", hash = "sha256:25e4e097110ce28824b746a107727ada94024cba11db8bc588d468414692b65a"},
     {file = "datasets-2.21.0.tar.gz", hash = "sha256:998f85a8460f1bd982e5bd058f8a0808eef424249e3df1e8cdd594ccd0dc8ba2"},
@@ -835,9 +875,9 @@ xxhash = "*"
 
 [package.extras]
 apache-beam = ["apache-beam (>=2.26.0)"]
-audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0)"]
+audio = ["librosa", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\""]
 benchmarks = ["tensorflow (==2.12.0)", "torch (==2.0.1)", "transformers (==4.30.1)"]
-dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
+dev = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "ruff (>=0.3.0)", "s3fs", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0)", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch", "torch (>=2.0.0)", "transformers", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
 docs = ["s3fs", "tensorflow (>=2.6.0)", "torch", "transformers"]
 jax = ["jax (>=0.3.14)", "jaxlib (>=0.3.14)"]
 metrics-tests = ["Werkzeug (>=1.0.1)", "accelerate", "bert-score (>=0.3.6)", "jiwer", "langdetect", "mauve-text", "nltk (<3.8.2)", "requests-file (>=1.5.1)", "rouge-score", "sacrebleu", "sacremoses", "scikit-learn", "scipy", "sentencepiece", "seqeval", "six (>=1.15.0,<1.16.0)", "spacy (>=3.0.0)", "texttable (>=1.6.3)", "tldextract", "tldextract (>=3.1.0)", "toml (>=0.10.1)", "typer (<0.5.0)"]
@@ -845,8 +885,8 @@ quality = ["ruff (>=0.3.0)"]
 s3 = ["s3fs"]
 tensorflow = ["tensorflow (>=2.6.0)"]
 tensorflow-gpu = ["tensorflow (>=2.6.0)"]
-tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tensorflow (>=2.16.0)", "tensorflow (>=2.6.0)", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
-tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14)", "jaxlib (>=0.3.14)", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0)", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "typing-extensions (>=4.6.1)", "zstandard"]
+tests = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "faiss-cpu (>=1.8.0.post1)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tensorflow (>=2.16.0) ; python_version >= \"3.10\"", "tensorflow (>=2.6.0) ; python_version < \"3.10\"", "tiktoken", "torch (>=2.0.0)", "transformers (>=4.42.0)", "typing-extensions (>=4.6.1)", "zstandard"]
+tests-numpy2 = ["Pillow (>=9.4.0)", "absl-py", "decorator", "elasticsearch (<8.0.0)", "jax (>=0.3.14) ; sys_platform != \"win32\"", "jaxlib (>=0.3.14) ; sys_platform != \"win32\"", "joblib (<1.3.0)", "joblibspark", "librosa", "lz4", "moto[server]", "polars[timezone] (>=0.20.0)", "protobuf (<4.0.0)", "py7zr", "pyspark (>=3.4)", "pytest", "pytest-datadir", "pytest-xdist", "rarfile (>=4.0)", "s3fs (>=2021.11.1)", "soundfile (>=0.12.1)", "soxr (>=0.4.0) ; python_version >= \"3.9\"", "sqlalchemy", "tiktoken", "torch (>=2.0.0)", "typing-extensions (>=4.6.1)", "zstandard"]
 torch = ["torch"]
 vision = ["Pillow (>=9.4.0)"]
 
@@ -856,6 +896,7 @@ version = "1.8.7"
 description = "An implementation of the Debug Adapter Protocol for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "debugpy-1.8.7-cp310-cp310-macosx_14_0_x86_64.whl", hash = "sha256:95fe04a573b8b22896c404365e03f4eda0ce0ba135b7667a1e57bd079793b96b"},
     {file = "debugpy-1.8.7-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:628a11f4b295ffb4141d8242a9bb52b77ad4a63a2ad19217a93be0f77f2c28c9"},
@@ -891,6 +932,7 @@ version = "5.1.1"
 description = "Decorators for Humans"
 optional = false
 python-versions = ">=3.5"
+groups = ["dev"]
 files = [
     {file = "decorator-5.1.1-py3-none-any.whl", hash = "sha256:b8c3f85900b9dc423225913c5aace94729fe1fa9763b38939a95226f02d37186"},
     {file = "decorator-5.1.1.tar.gz", hash = "sha256:637996211036b6385ef91435e4fae22989472f9d571faba8927ba8253acbc330"},
@@ -902,6 +944,7 @@ version = "2.1.1"
 description = "Opinionated mocking library for Python"
 optional = false
 python-versions = ">=3.7,<4.0"
+groups = ["dev"]
 files = [
     {file = "decoy-2.1.1-py3-none-any.whl", hash = "sha256:7ddcc08b8ce991f7705cee76fae9061dcb17352e0a1ca2d9a0d4a0306ebd51cd"},
     {file = "decoy-2.1.1.tar.gz", hash = "sha256:575bdbe81afb4c152cd99a34568a9aa4369461f79d6172c678279c5d5585befe"},
@@ -913,6 +956,7 @@ version = "0.7.1"
 description = "XML bomb protection for Python stdlib modules"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["dev"]
 files = [
     {file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
     {file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
@@ -924,6 +968,8 @@ version = "0.3.8"
 description = "serialize all of Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "dill-0.3.8-py3-none-any.whl", hash = "sha256:c36ca9ffb54365bdd2f8eb3eff7d2a21237f8452b57ace88b1ac615b7e815bd7"},
     {file = "dill-0.3.8.tar.gz", hash = "sha256:3ebe3c479ad625c4553aca177444d89b486b1d84982eeacded644afc0cf797ca"},
@@ -939,6 +985,8 @@ version = "3.2.6"
 description = "The dynamic configurator for your Python Project"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "dynaconf-3.2.6-py2.py3-none-any.whl", hash = "sha256:3911c740d717df4576ed55f616c7cbad6e06bc8ef23ffca444b6e2a12fb1c34c"},
     {file = "dynaconf-3.2.6.tar.gz", hash = "sha256:74cc1897396380bb957730eb341cc0976ee9c38bbcb53d3307c50caed0aedfb8"},
@@ -977,6 +1025,8 @@ version = "1.2.2"
 description = "Backport of PEP 654 (exception groups)"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "exceptiongroup-1.2.2-py3-none-any.whl", hash = "sha256:3111b9d131c238bec2f8f516e123e14ba243563fb135d3fe885990585aa7795b"},
     {file = "exceptiongroup-1.2.2.tar.gz", hash = "sha256:47c2edf7c6738fafb49fd34290706d1a1a2f4d1c6df275526b62cbb4aa5393cc"},
@@ -991,13 +1041,14 @@ version = "2.1.0"
 description = "Get the currently executing AST node of a frame, and other information"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "executing-2.1.0-py2.py3-none-any.whl", hash = "sha256:8d63781349375b5ebccc3142f4b30350c0cd9c79f921cde38be2be4637e98eaf"},
     {file = "executing-2.1.0.tar.gz", hash = "sha256:8ea27ddd260da8150fa5a708269c4a10e76161e2496ec3e587da9e3c0fe4b9ab"},
 ]
 
 [package.extras]
-tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]
+tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich ; python_version >= \"3.11\""]
 
 [[package]]
 name = "fastjsonschema"
@@ -1005,6 +1056,7 @@ version = "2.20.0"
 description = "Fastest Python implementation of JSON schema"
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "fastjsonschema-2.20.0-py3-none-any.whl", hash = "sha256:5875f0b0fa7a0043a91e93a9b8f793bcbbba9691e7fd83dca95c28ba26d21f0a"},
     {file = "fastjsonschema-2.20.0.tar.gz", hash = "sha256:3d48fc5300ee96f5d116f10fe6f28d938e6008f59a6a025c2649475b87f76a23"},
@@ -1019,15 +1071,17 @@ version = "3.16.1"
 description = "A platform independent file lock."
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "gpu"]
 files = [
     {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"},
     {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.extras]
 docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"]
 testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"]
-typing = ["typing-extensions (>=4.12.2)"]
+typing = ["typing-extensions (>=4.12.2) ; python_version < \"3.11\""]
 
 [[package]]
 name = "flake8"
@@ -1035,6 +1089,7 @@ version = "7.1.1"
 description = "the modular source code checker: pep8 pyflakes and co"
 optional = false
 python-versions = ">=3.8.1"
+groups = ["dev"]
 files = [
     {file = "flake8-7.1.1-py2.py3-none-any.whl", hash = "sha256:597477df7860daa5aa0fdd84bf5208a043ab96b8e96ab708770ae0364dd03213"},
     {file = "flake8-7.1.1.tar.gz", hash = "sha256:049d058491e228e03e67b390f311bbf88fce2dbaa8fa673e7aea87b7198b8d38"},
@@ -1051,6 +1106,7 @@ version = "1.5.1"
 description = "Validates fully-qualified domain names against RFC 1123, so that they are acceptable to modern bowsers"
 optional = false
 python-versions = ">=2.7, !=3.0, !=3.1, !=3.2, !=3.3, !=3.4, <4"
+groups = ["dev"]
 files = [
     {file = "fqdn-1.5.1-py3-none-any.whl", hash = "sha256:3a179af3761e4df6eb2e026ff9e1a3033d3587bf980a0b1b2e1e5d08d7358014"},
     {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
@@ -1062,6 +1118,8 @@ version = "1.5.0"
 description = "A list-like structure which implements collections.abc.MutableSequence"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"},
     {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:d1b3eb7b05ea246510b43a7e53ed1653e55c2121019a97e60cad7efb881a97bb"},
@@ -1163,10 +1221,12 @@ version = "2024.6.1"
 description = "File-system specification"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "gpu"]
 files = [
     {file = "fsspec-2024.6.1-py3-none-any.whl", hash = "sha256:3cb443f8bcd2efb31295a5b9fdb02aee81d8452c80d28f97a6d0959e6cee101e"},
     {file = "fsspec-2024.6.1.tar.gz", hash = "sha256:fad7d7e209dd4c1208e3bbfda706620e0da5142bebbd9c384afb95b07e798e49"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.dependencies]
 aiohttp = {version = "<4.0.0a0 || >4.0.0a0,<4.0.0a1 || >4.0.0a1", optional = true, markers = "extra == \"http\""}
@@ -1205,6 +1265,8 @@ version = "2.1.3"
 description = "URL manipulation made simple."
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "furl-2.1.3-py2.py3-none-any.whl", hash = "sha256:9ab425062c4217f9802508e45feb4a83e54324273ac4b202f1850363309666c0"},
     {file = "furl-2.1.3.tar.gz", hash = "sha256:5a6188fe2666c484a12159c18be97a1977a71d632ef5bb867ef15f54af39cc4e"},
@@ -1220,6 +1282,7 @@ version = "0.14.0"
 description = "A pure-Python, bring-your-own-I/O implementation of HTTP/1.1"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "h11-0.14.0-py3-none-any.whl", hash = "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761"},
     {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
@@ -1231,6 +1294,7 @@ version = "1.0.6"
 description = "A minimal low-level HTTP client."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "httpcore-1.0.6-py3-none-any.whl", hash = "sha256:27b59625743b85577a8c0e10e55b50b5368a4f2cfe8cc7bcfa9cf00829c2682f"},
     {file = "httpcore-1.0.6.tar.gz", hash = "sha256:73f6dbd6eb8c21bbf7ef8efad555481853f5f6acdeaff1edb0694289269ee17f"},
@@ -1252,6 +1316,7 @@ version = "0.27.2"
 description = "The next generation HTTP client."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "httpx-0.27.2-py3-none-any.whl", hash = "sha256:7bb2708e112d8fdd7829cd4243970f0c223274051cb35ee80c03301ee29a3df0"},
     {file = "httpx-0.27.2.tar.gz", hash = "sha256:f7c2be1d2f3c3c3160d441802406b206c2b76f5947b11115e6df10c6c65e66c2"},
@@ -1265,7 +1330,7 @@ idna = "*"
 sniffio = "*"
 
 [package.extras]
-brotli = ["brotli", "brotlicffi"]
+brotli = ["brotli ; platform_python_implementation == \"CPython\"", "brotlicffi ; platform_python_implementation != \"CPython\""]
 cli = ["click (==8.*)", "pygments (==2.*)", "rich (>=10,<14)"]
 http2 = ["h2 (>=3,<5)"]
 socks = ["socksio (==1.*)"]
@@ -1277,10 +1342,12 @@ version = "0.26.2"
 description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main", "gpu"]
 files = [
     {file = "huggingface_hub-0.26.2-py3-none-any.whl", hash = "sha256:98c2a5a8e786c7b2cb6fdeb2740893cba4d53e312572ed3d8afafda65b128c46"},
     {file = "huggingface_hub-0.26.2.tar.gz", hash = "sha256:b100d853465d965733964d123939ba287da60a547087783ddff8a323f340332b"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.dependencies]
 filelock = "*"
@@ -1311,10 +1378,12 @@ version = "3.10"
 description = "Internationalized Domain Names in Applications (IDNA)"
 optional = false
 python-versions = ">=3.6"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"},
     {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"},
 ]
+markers = {main = "extra == \"huggingface\" or extra == \"jobs\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.extras]
 all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"]
@@ -1325,6 +1394,8 @@ version = "8.5.0"
 description = "Read metadata from Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version == \"3.9\""
 files = [
     {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"},
     {file = "importlib_metadata-8.5.0.tar.gz", hash = "sha256:71522656f0abace1d072b9e5481a48f07c138e00f079c38c8f883823f9c26bd7"},
@@ -1334,12 +1405,12 @@ files = [
 zipp = ">=3.20"
 
 [package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
 enabler = ["pytest-enabler (>=2.2)"]
 perf = ["ipython"]
-test = ["flufl.flake8", "importlib-resources (>=1.3)", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
+test = ["flufl.flake8", "importlib-resources (>=1.3) ; python_version < \"3.9\"", "jaraco.test (>=5.4)", "packaging", "pyfakefs", "pytest (>=6,!=8.1.*)", "pytest-perf (>=0.9.2)"]
 type = ["pytest-mypy"]
 
 [[package]]
@@ -1348,6 +1419,7 @@ version = "2.0.0"
 description = "brain-dead simple config-ini parsing"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "iniconfig-2.0.0-py3-none-any.whl", hash = "sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374"},
     {file = "iniconfig-2.0.0.tar.gz", hash = "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3"},
@@ -1359,6 +1431,7 @@ version = "6.29.5"
 description = "IPython Kernel for Jupyter"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "ipykernel-6.29.5-py3-none-any.whl", hash = "sha256:afdb66ba5aa354b09b91379bac28ae4afebbb30e8b39510c9690afb7a10421b5"},
     {file = "ipykernel-6.29.5.tar.gz", hash = "sha256:f093a22c4a40f8828f8e330a9c297cb93dcab13bd9678ded6de8e5cf81c56215"},
@@ -1392,6 +1465,7 @@ version = "8.18.1"
 description = "IPython: Productive Interactive Computing"
 optional = false
 python-versions = ">=3.9"
+groups = ["dev"]
 files = [
     {file = "ipython-8.18.1-py3-none-any.whl", hash = "sha256:e8267419d72d81955ec1177f8a29aaa90ac80ad647499201119e2f05e99aa397"},
     {file = "ipython-8.18.1.tar.gz", hash = "sha256:ca6f079bb33457c66e233e4580ebfc4128855b4cf6370dddd73842a9563e8a27"},
@@ -1429,6 +1503,7 @@ version = "8.1.5"
 description = "Jupyter interactive widgets"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "ipywidgets-8.1.5-py3-none-any.whl", hash = "sha256:3290f526f87ae6e77655555baba4f36681c555b8bdbbff430b70e52c34c86245"},
     {file = "ipywidgets-8.1.5.tar.gz", hash = "sha256:870e43b1a35656a80c18c9503bbf2d16802db1cb487eec6fab27d683381dde17"},
@@ -1450,6 +1525,7 @@ version = "20.11.0"
 description = "Operations with ISO 8601 durations"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "isoduration-20.11.0-py3-none-any.whl", hash = "sha256:b2904c2a4228c3d44f409c8ae8e2370eb21a26f7ac2ec5446df141dde3452042"},
     {file = "isoduration-20.11.0.tar.gz", hash = "sha256:ac2f9015137935279eac671f94f89eb00584f940f5dc49462a0c4ee692ba1bd9"},
@@ -1464,6 +1540,7 @@ version = "5.13.2"
 description = "A Python utility / library to sort Python imports."
 optional = false
 python-versions = ">=3.8.0"
+groups = ["dev"]
 files = [
     {file = "isort-5.13.2-py3-none-any.whl", hash = "sha256:8ca5e72a8d85860d5a3fa69b8745237f2939afe12dbf656afbcb47fe72d947a6"},
     {file = "isort-5.13.2.tar.gz", hash = "sha256:48fdfcb9face5d58a4f6dde2e72a1fb8dcaf8ab26f95ab49fab84c2ddefb0109"},
@@ -1478,6 +1555,7 @@ version = "0.19.1"
 description = "An autocompletion tool for Python that can be used for text editors."
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "jedi-0.19.1-py2.py3-none-any.whl", hash = "sha256:e983c654fe5c02867aef4cdfce5a2fbb4a50adc0af145f70504238f18ef5e7e0"},
     {file = "jedi-0.19.1.tar.gz", hash = "sha256:cf0496f3651bc65d7174ac1b7d043eff454892c708a87d1b683e57b569927ffd"},
@@ -1497,10 +1575,12 @@ version = "3.1.4"
 description = "A very fast and expressive template engine."
 optional = false
 python-versions = ">=3.7"
+groups = ["dev", "gpu"]
 files = [
     {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"},
     {file = "jinja2-3.1.4.tar.gz", hash = "sha256:4a3aee7acbbe7303aede8e9648d13b8bf88a429282aa6122a993f0ac800cb369"},
 ]
+markers = {gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.dependencies]
 MarkupSafe = ">=2.0"
@@ -1514,6 +1594,7 @@ version = "1.0.1"
 description = "JSON Matching Expressions"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
 files = [
     {file = "jmespath-1.0.1-py3-none-any.whl", hash = "sha256:02e2e4cc71b5bcab88332eebf907519190dd9e6e82107fa7f83b1003a6252980"},
     {file = "jmespath-1.0.1.tar.gz", hash = "sha256:90261b206d6defd58fdd5e85f478bf633a2901798906be2ad389150c5c60edbe"},
@@ -1525,6 +1606,8 @@ version = "1.4.2"
 description = "Lightweight pipelining with Python functions"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "joblib-1.4.2-py3-none-any.whl", hash = "sha256:06d478d5674cbc267e7496a410ee875abd68e4340feff4490bcb7afb88060ae6"},
     {file = "joblib-1.4.2.tar.gz", hash = "sha256:2382c5816b2636fbd20a09e0f4e9dad4736765fdfb7dca582943b9c1366b3f0e"},
@@ -1536,6 +1619,8 @@ version = "1.5.2"
 description = "Streaming JSON encoder and decoder"
 optional = false
 python-versions = "<4,>=3.5"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "json-stream-1.5.2.tar.gz", hash = "sha256:e6f895d48190b539c431e3d8623ed868bee6d0005d5b213be6ee26256ef20ebc"},
     {file = "json_stream-1.5.2-py3-none-any.whl", hash = "sha256:e0363e887770e879f438c151c56f2d12fda674e92bbf1b5c184d84723deee631"},
@@ -1550,6 +1635,7 @@ version = "0.9.25"
 description = "A Python implementation of the JSON5 data format."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "json5-0.9.25-py3-none-any.whl", hash = "sha256:34ed7d834b1341a86987ed52f3f76cd8ee184394906b6e22a1e0deb9ab294e8f"},
     {file = "json5-0.9.25.tar.gz", hash = "sha256:548e41b9be043f9426776f05df8635a00fe06104ea51ed24b67f908856e151ae"},
@@ -1561,6 +1647,7 @@ version = "3.0.0"
 description = "Identify specific nodes in a JSON document (RFC 6901)"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "jsonpointer-3.0.0-py2.py3-none-any.whl", hash = "sha256:13e088adc14fca8b6aa8177c044e12701e6ad4b28ff10e65f2267a90109c9942"},
     {file = "jsonpointer-3.0.0.tar.gz", hash = "sha256:2b2d729f2091522d61c3b31f82e11870f60b68f43fbc705cb76bf4b832af59ef"},
@@ -1572,10 +1659,12 @@ version = "4.23.0"
 description = "An implementation of JSON Schema validation for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"},
     {file = "jsonschema-4.23.0.tar.gz", hash = "sha256:d71497fef26351a33265337fa77ffeb82423f3ea21283cd9467bb03999266bc4"},
 ]
+markers = {main = "extra == \"jobs\""}
 
 [package.dependencies]
 attrs = ">=22.2.0"
@@ -1601,10 +1690,12 @@ version = "2024.10.1"
 description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry"
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "dev"]
 files = [
     {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"},
     {file = "jsonschema_specifications-2024.10.1.tar.gz", hash = "sha256:0f38b83639958ce1152d02a7f062902c41c8fd20d558b0c34344292d417ae272"},
 ]
+markers = {main = "extra == \"jobs\""}
 
 [package.dependencies]
 referencing = ">=0.31.0"
@@ -1615,6 +1706,7 @@ version = "1.1.1"
 description = "Jupyter metapackage. Install all the Jupyter components in one go."
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "jupyter-1.1.1-py2.py3-none-any.whl", hash = "sha256:7a59533c22af65439b24bbe60373a4e95af8f16ac65a6c00820ad378e3f7cc83"},
     {file = "jupyter-1.1.1.tar.gz", hash = "sha256:d55467bceabdea49d7e3624af7e33d59c37fff53ed3a350e1ac957bed731de7a"},
@@ -1634,6 +1726,7 @@ version = "8.6.3"
 description = "Jupyter protocol implementation and client libraries"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyter_client-8.6.3-py3-none-any.whl", hash = "sha256:e8a19cc986cc45905ac3362915f410f3af85424b4c0905e94fa5f2cb08e8f23f"},
     {file = "jupyter_client-8.6.3.tar.gz", hash = "sha256:35b3a0947c4a6e9d589eb97d7d4cd5e90f910ee73101611f01283732bd6d9419"},
@@ -1649,7 +1742,7 @@ traitlets = ">=5.3"
 
 [package.extras]
 docs = ["ipykernel", "myst-parser", "pydata-sphinx-theme", "sphinx (>=4)", "sphinx-autodoc-typehints", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"]
-test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko", "pre-commit", "pytest (<8.2.0)", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"]
+test = ["coverage", "ipykernel (>=6.14)", "mypy", "paramiko ; sys_platform == \"win32\"", "pre-commit", "pytest (<8.2.0)", "pytest-cov", "pytest-jupyter[client] (>=0.4.1)", "pytest-timeout"]
 
 [[package]]
 name = "jupyter-console"
@@ -1657,6 +1750,7 @@ version = "6.6.3"
 description = "Jupyter terminal console"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "jupyter_console-6.6.3-py3-none-any.whl", hash = "sha256:309d33409fcc92ffdad25f0bcdf9a4a9daa61b6f341177570fdac03de5352485"},
     {file = "jupyter_console-6.6.3.tar.gz", hash = "sha256:566a4bf31c87adbfadf22cdf846e3069b59a71ed5da71d6ba4d8aaad14a53539"},
@@ -1681,6 +1775,7 @@ version = "5.7.2"
 description = "Jupyter core package. A base package on which Jupyter projects rely."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyter_core-5.7.2-py3-none-any.whl", hash = "sha256:4f7315d2f6b4bcf2e3e7cb6e46772eba760ae459cd1f59d29eb57b0a01bd7409"},
     {file = "jupyter_core-5.7.2.tar.gz", hash = "sha256:aa5f8d32bbf6b431ac830496da7392035d6f61b4f54872f15c4bd2a9c3f536d9"},
@@ -1701,6 +1796,7 @@ version = "0.10.0"
 description = "Jupyter Event System library"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyter_events-0.10.0-py3-none-any.whl", hash = "sha256:4b72130875e59d57716d327ea70d3ebc3af1944d3717e5a498b8a06c6c159960"},
     {file = "jupyter_events-0.10.0.tar.gz", hash = "sha256:670b8229d3cc882ec782144ed22e0d29e1c2d639263f92ca8383e66682845e22"},
@@ -1726,6 +1822,7 @@ version = "2.2.5"
 description = "Multi-Language Server WebSocket proxy for Jupyter Notebook/Lab server"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyter-lsp-2.2.5.tar.gz", hash = "sha256:793147a05ad446f809fd53ef1cd19a9f5256fd0a2d6b7ce943a982cb4f545001"},
     {file = "jupyter_lsp-2.2.5-py3-none-any.whl", hash = "sha256:45fbddbd505f3fbfb0b6cb2f1bc5e15e83ab7c79cd6e89416b248cb3c00c11da"},
@@ -1741,6 +1838,7 @@ version = "2.14.2"
 description = "The backend—i.e. core services, APIs, and REST endpoints—to Jupyter web applications."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyter_server-2.14.2-py3-none-any.whl", hash = "sha256:47ff506127c2f7851a17bf4713434208fc490955d0e8632e95014a9a9afbeefd"},
     {file = "jupyter_server-2.14.2.tar.gz", hash = "sha256:66095021aa9638ced276c248b1d81862e4c50f292d575920bbe960de1c56b12b"},
@@ -1777,6 +1875,7 @@ version = "0.5.3"
 description = "A Jupyter Server Extension Providing Terminals."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyter_server_terminals-0.5.3-py3-none-any.whl", hash = "sha256:41ee0d7dc0ebf2809c668e0fc726dfaf258fcd3e769568996ca731b6194ae9aa"},
     {file = "jupyter_server_terminals-0.5.3.tar.gz", hash = "sha256:5ae0295167220e9ace0edcfdb212afd2b01ee8d179fe6f23c899590e9b8a5269"},
@@ -1796,6 +1895,7 @@ version = "4.2.5"
 description = "JupyterLab computational environment"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyterlab-4.2.5-py3-none-any.whl", hash = "sha256:73b6e0775d41a9fee7ee756c80f58a6bed4040869ccc21411dc559818874d321"},
     {file = "jupyterlab-4.2.5.tar.gz", hash = "sha256:ae7f3a1b8cb88b4f55009ce79fa7c06f99d70cd63601ee4aa91815d054f46f75"},
@@ -1831,6 +1931,7 @@ version = "0.3.0"
 description = "Pygments theme using JupyterLab CSS variables"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyterlab_pygments-0.3.0-py3-none-any.whl", hash = "sha256:841a89020971da1d8693f1a99997aefc5dc424bb1b251fd6322462a1b8842780"},
     {file = "jupyterlab_pygments-0.3.0.tar.gz", hash = "sha256:721aca4d9029252b11cfa9d185e5b5af4d54772bb8072f9b7036f4170054d35d"},
@@ -1842,6 +1943,7 @@ version = "2.27.3"
 description = "A set of server components for JupyterLab and JupyterLab like applications."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "jupyterlab_server-2.27.3-py3-none-any.whl", hash = "sha256:e697488f66c3db49df675158a77b3b017520d772c6e1548c7d9bcc5df7944ee4"},
     {file = "jupyterlab_server-2.27.3.tar.gz", hash = "sha256:eb36caca59e74471988f0ae25c77945610b887f777255aa21f8065def9e51ed4"},
@@ -1868,6 +1970,7 @@ version = "3.0.13"
 description = "Jupyter interactive widgets for JupyterLab"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "jupyterlab_widgets-3.0.13-py3-none-any.whl", hash = "sha256:e3cda2c233ce144192f1e29914ad522b2f4c40e77214b0cc97377ca3d323db54"},
     {file = "jupyterlab_widgets-3.0.13.tar.gz", hash = "sha256:a2966d385328c1942b683a8cd96b89b8dd82c8b8f81dda902bb2bc06d46f5bed"},
@@ -1879,6 +1982,7 @@ version = "3.0.2"
 description = "Safely add untrusted strings to HTML/XML markup."
 optional = false
 python-versions = ">=3.9"
+groups = ["dev", "gpu"]
 files = [
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:7e94c425039cde14257288fd61dcfb01963e658efbc0ff54f5306b06054700f8"},
     {file = "MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9e2d922824181480953426608b81967de705c3cef4d1af983af849d7bd619158"},
@@ -1942,6 +2046,7 @@ files = [
     {file = "MarkupSafe-3.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:6e296a513ca3d94054c2c881cc913116e90fd030ad1c656b3869762b754f5f8a"},
     {file = "markupsafe-3.0.2.tar.gz", hash = "sha256:ee55d3edf80167e48ea11a923c7386f4669df67d7994554387f84e7d8b0a2bf0"},
 ]
+markers = {gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [[package]]
 name = "matplotlib-inline"
@@ -1949,6 +2054,7 @@ version = "0.1.7"
 description = "Inline Matplotlib backend for Jupyter"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "matplotlib_inline-0.1.7-py3-none-any.whl", hash = "sha256:df192d39a4ff8f21b1895d72e6a13f5fcc5099f00fa84384e0ea28c2cc0653ca"},
     {file = "matplotlib_inline-0.1.7.tar.gz", hash = "sha256:8423b23ec666be3d16e16b60bdd8ac4e86e840ebd1dd11a30b9f117f2fa0ab90"},
@@ -1963,6 +2069,7 @@ version = "0.7.0"
 description = "McCabe checker, plugin for flake8"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e"},
     {file = "mccabe-0.7.0.tar.gz", hash = "sha256:348e0240c33b60bbdf4e523192ef919f28cb2c3d7d5c7794f74009290f236325"},
@@ -1974,6 +2081,7 @@ version = "3.0.2"
 description = "A sane and fast Markdown parser with useful plugins and renderers"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "mistune-3.0.2-py3-none-any.whl", hash = "sha256:71481854c30fdbc938963d3605b72501f5c10a9320ecd412c121c163a1c7d205"},
     {file = "mistune-3.0.2.tar.gz", hash = "sha256:fc7f93ded930c92394ef2cb6f04a8aabab4117a91449e72dcc8dfa646a508be8"},
@@ -1985,6 +2093,8 @@ version = "1.3.0"
 description = "Python library for arbitrary-precision floating-point arithmetic"
 optional = false
 python-versions = "*"
+groups = ["gpu"]
+markers = "sys_platform == \"win32\" or sys_platform == \"linux\""
 files = [
     {file = "mpmath-1.3.0-py3-none-any.whl", hash = "sha256:a0b2b9fe80bbcd81a6647ff13108738cfb482d481d826cc0e02f5b35e5c88d2c"},
     {file = "mpmath-1.3.0.tar.gz", hash = "sha256:7a28eb2a9774d00c7bc92411c19a89209d5da7c4c9a9e227be8330a23a25b91f"},
@@ -1993,7 +2103,7 @@ files = [
 [package.extras]
 develop = ["codecov", "pycodestyle", "pytest (>=4.6)", "pytest-cov", "wheel"]
 docs = ["sphinx"]
-gmpy = ["gmpy2 (>=2.1.0a4)"]
+gmpy = ["gmpy2 (>=2.1.0a4) ; platform_python_implementation != \"PyPy\""]
 tests = ["pytest (>=4.6)"]
 
 [[package]]
@@ -2002,6 +2112,8 @@ version = "6.1.0"
 description = "multidict implementation"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"},
     {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:99f826cbf970077383d7de805c0681799491cb939c25450b9b5b3ced03ca99f1"},
@@ -2106,6 +2218,8 @@ version = "0.70.16"
 description = "better multiprocessing and multithreading in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-macosx_10_13_x86_64.whl", hash = "sha256:476887be10e2f59ff183c006af746cb6f1fd0eadcfd4ef49e605cbe2659920ee"},
     {file = "multiprocess-0.70.16-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:d951bed82c8f73929ac82c61f01a7b5ce8f3e5ef40f5b52553b4f547ce2b08ec"},
@@ -2130,6 +2244,7 @@ version = "1.0.0"
 description = "Type system extensions for programs checked with the mypy type checker."
 optional = false
 python-versions = ">=3.5"
+groups = ["dev"]
 files = [
     {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
     {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
@@ -2141,6 +2256,7 @@ version = "0.10.0"
 description = "A client library for executing notebooks. Formerly nbconvert's ExecutePreprocessor."
 optional = false
 python-versions = ">=3.8.0"
+groups = ["dev"]
 files = [
     {file = "nbclient-0.10.0-py3-none-any.whl", hash = "sha256:f13e3529332a1f1f81d82a53210322476a168bb7090a0289c795fe9cc11c9d3f"},
     {file = "nbclient-0.10.0.tar.gz", hash = "sha256:4b3f1b7dba531e498449c4db4f53da339c91d449dc11e9af3a43b4eb5c5abb09"},
@@ -2163,6 +2279,7 @@ version = "7.16.4"
 description = "Converting Jupyter Notebooks (.ipynb files) to other formats.  Output formats include asciidoc, html, latex, markdown, pdf, py, rst, script.  nbconvert can be used both as a Python library (`import nbconvert`) or as a command line tool (invoked as `jupyter nbconvert ...`)."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "nbconvert-7.16.4-py3-none-any.whl", hash = "sha256:05873c620fe520b6322bf8a5ad562692343fe3452abda5765c7a34b7d1aa3eb3"},
     {file = "nbconvert-7.16.4.tar.gz", hash = "sha256:86ca91ba266b0a448dc96fa6c5b9d98affabde2867b363258703536807f9f7f4"},
@@ -2201,6 +2318,7 @@ version = "5.10.4"
 description = "The Jupyter Notebook format"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "nbformat-5.10.4-py3-none-any.whl", hash = "sha256:3b48d6c8fbca4b299bf3982ea7db1af21580e4fec269ad087b9e81588891200b"},
     {file = "nbformat-5.10.4.tar.gz", hash = "sha256:322168b14f937a5d11362988ecac2a4952d3d8e3a2cbeb2319584631226d5b3a"},
@@ -2222,6 +2340,7 @@ version = "1.6.0"
 description = "Patch asyncio to allow nested event loops"
 optional = false
 python-versions = ">=3.5"
+groups = ["dev"]
 files = [
     {file = "nest_asyncio-1.6.0-py3-none-any.whl", hash = "sha256:87af6efd6b5e897c81050477ef65c62e2b2f35d51703cae01aff2905b1852e1c"},
     {file = "nest_asyncio-1.6.0.tar.gz", hash = "sha256:6f172d5449aca15afd6c646851f4e31e02c598d553a667e38cafa997cfec55fe"},
@@ -2233,6 +2352,7 @@ version = "3.2.1"
 description = "Python package for creating and manipulating graphs and networks"
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "gpu"]
 files = [
     {file = "networkx-3.2.1-py3-none-any.whl", hash = "sha256:f18c69adc97877c42332c170849c96cefa91881c99a7cb3e95b7c659ebdc1ec2"},
     {file = "networkx-3.2.1.tar.gz", hash = "sha256:9f1bb5cf3409bf324e0a722c20bdb4c20ee39bf1c30ce8ae499c8502b0b5e0c6"},
@@ -2251,6 +2371,7 @@ version = "1.9.1"
 description = "Node.js virtual environment builder"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,>=2.7"
+groups = ["dev"]
 files = [
     {file = "nodeenv-1.9.1-py2.py3-none-any.whl", hash = "sha256:ba11c9782d29c27c70ffbdda2d7415098754709be8a7056d79a737cd901155c9"},
     {file = "nodeenv-1.9.1.tar.gz", hash = "sha256:6ec12890a2dab7946721edbfbcd91f3319c6ccc9aec47be7c7e6b7011ee6645f"},
@@ -2262,6 +2383,7 @@ version = "22.13.1"
 description = "unoffical Node.js package"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "nodejs_wheel_binaries-22.13.1-py2.py3-none-macosx_11_0_arm64.whl", hash = "sha256:e4f64d0e26600d51cbdd98a6718a19c2d1b8c7538e9e353e95a634a06a8e1a58"},
     {file = "nodejs_wheel_binaries-22.13.1-py2.py3-none-macosx_11_0_x86_64.whl", hash = "sha256:afcb40484bb02f23137f838014724604ae183fd767b30da95b0be1510a40c06d"},
@@ -2280,6 +2402,7 @@ version = "7.2.2"
 description = "Jupyter Notebook - A web-based notebook environment for interactive computing"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "notebook-7.2.2-py3-none-any.whl", hash = "sha256:c89264081f671bc02eec0ed470a627ed791b9156cad9285226b31611d3e9fe1c"},
     {file = "notebook-7.2.2.tar.gz", hash = "sha256:2ef07d4220421623ad3fe88118d687bc0450055570cdd160814a59cf3a1c516e"},
@@ -2295,7 +2418,7 @@ tornado = ">=6.2.0"
 [package.extras]
 dev = ["hatch", "pre-commit"]
 docs = ["myst-parser", "nbsphinx", "pydata-sphinx-theme", "sphinx (>=1.3.6)", "sphinxcontrib-github-alt", "sphinxcontrib-spelling"]
-test = ["importlib-resources (>=5.0)", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"]
+test = ["importlib-resources (>=5.0) ; python_version < \"3.10\"", "ipykernel", "jupyter-server[test] (>=2.4.0,<3)", "jupyterlab-server[test] (>=2.27.1,<3)", "nbval", "pytest (>=7.0)", "pytest-console-scripts", "pytest-timeout", "pytest-tornasync", "requests"]
 
 [[package]]
 name = "notebook-shim"
@@ -2303,6 +2426,7 @@ version = "0.2.4"
 description = "A shim layer for notebook traits and config"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "notebook_shim-0.2.4-py3-none-any.whl", hash = "sha256:411a5be4e9dc882a074ccbcae671eda64cceb068767e9a3419096986560e1cef"},
     {file = "notebook_shim-0.2.4.tar.gz", hash = "sha256:b4b2cfa1b65d98307ca24361f5b30fe785b53c3fd07b7a47e89acb5e6ac638cb"},
@@ -2320,6 +2444,7 @@ version = "1.26.4"
 description = "Fundamental package for array computing in Python"
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "numpy-1.26.4-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:9ff0f4f29c51e2803569d7a51c2304de5554655a60c5d776e35b4a41413830d0"},
     {file = "numpy-1.26.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:2e4ee3380d6de9c9ec04745830fd9e2eccb3e6cf790d39d7b98ffd19b0dd754a"},
@@ -2365,6 +2490,8 @@ version = "12.1.3.1"
 description = "CUBLAS native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl", hash = "sha256:ee53ccca76a6fc08fb9701aa95b6ceb242cdaab118c3bb152af4e579af792728"},
     {file = "nvidia_cublas_cu12-12.1.3.1-py3-none-win_amd64.whl", hash = "sha256:2b964d60e8cf11b5e1073d179d85fa340c120e99b3067558f3cf98dd69d02906"},
@@ -2376,6 +2503,8 @@ version = "12.1.105"
 description = "CUDA profiling tools runtime libs."
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:e54fde3983165c624cb79254ae9818a456eb6e87a7fd4d56a2352c24ee542d7e"},
     {file = "nvidia_cuda_cupti_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:bea8236d13a0ac7190bd2919c3e8e6ce1e402104276e6f9694479e48bb0eb2a4"},
@@ -2387,6 +2516,8 @@ version = "12.1.105"
 description = "NVRTC native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:339b385f50c309763ca65456ec75e17bbefcbbf2893f462cb8b90584cd27a1c2"},
     {file = "nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:0a98a522d9ff138b96c010a65e145dc1b4850e9ecb75a0172371793752fd46ed"},
@@ -2398,6 +2529,8 @@ version = "12.1.105"
 description = "CUDA Runtime native Libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:6e258468ddf5796e25f1dc591a31029fa317d97a0a94ed93468fc86301d61e40"},
     {file = "nvidia_cuda_runtime_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:dfb46ef84d73fababab44cf03e3b83f80700d27ca300e537f85f636fac474344"},
@@ -2409,6 +2542,8 @@ version = "9.1.0.70"
 description = "cuDNN runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
@@ -2423,6 +2558,8 @@ version = "11.0.2.54"
 description = "CUFFT native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-manylinux1_x86_64.whl", hash = "sha256:794e3948a1aa71fd817c3775866943936774d1c14e7628c74f6f7417224cdf56"},
     {file = "nvidia_cufft_cu12-11.0.2.54-py3-none-win_amd64.whl", hash = "sha256:d9ac353f78ff89951da4af698f80870b1534ed69993f10a4cf1d96f21357e253"},
@@ -2434,6 +2571,8 @@ version = "10.3.2.106"
 description = "CURAND native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_curand_cu12-10.3.2.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:9d264c5036dde4e64f1de8c50ae753237c12e0b1348738169cd0f8a536c0e1e0"},
     {file = "nvidia_curand_cu12-10.3.2.106-py3-none-win_amd64.whl", hash = "sha256:75b6b0c574c0037839121317e17fd01f8a69fd2ef8e25853d826fec30bdba74a"},
@@ -2445,6 +2584,8 @@ version = "11.4.5.107"
 description = "CUDA solver native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-manylinux1_x86_64.whl", hash = "sha256:8a7ec542f0412294b15072fa7dab71d31334014a69f953004ea7a118206fe0dd"},
     {file = "nvidia_cusolver_cu12-11.4.5.107-py3-none-win_amd64.whl", hash = "sha256:74e0c3a24c78612192a74fcd90dd117f1cf21dea4822e66d89e8ea80e3cd2da5"},
@@ -2461,6 +2602,8 @@ version = "12.1.0.106"
 description = "CUSPARSE native runtime libraries"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-manylinux1_x86_64.whl", hash = "sha256:f3b50f42cf363f86ab21f720998517a659a48131e8d538dc02f8768237bd884c"},
     {file = "nvidia_cusparse_cu12-12.1.0.106-py3-none-win_amd64.whl", hash = "sha256:b798237e81b9719373e8fae8d4f091b70a0cf09d9d85c95a557e11df2d8e9a5a"},
@@ -2475,6 +2618,8 @@ version = "2.20.5"
 description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01"},
     {file = "nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56"},
@@ -2486,6 +2631,8 @@ version = "12.6.77"
 description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_aarch64.whl", hash = "sha256:3bf10d85bb1801e9c894c6e197e44dd137d2a0a9e43f8450e9ad13f2df0dd52d"},
     {file = "nvidia_nvjitlink_cu12-12.6.77-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9ae346d16203ae4ea513be416495167a0101d33d2d14935aa9c1829a3fb45142"},
@@ -2498,6 +2645,8 @@ version = "12.1.105"
 description = "NVIDIA Tools Extension"
 optional = false
 python-versions = ">=3"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "nvidia_nvtx_cu12-12.1.105-py3-none-manylinux1_x86_64.whl", hash = "sha256:dc21cf308ca5691e7c04d962e213f8a4aa9bbfa23d95412f452254c2caeb09e5"},
     {file = "nvidia_nvtx_cu12-12.1.105-py3-none-win_amd64.whl", hash = "sha256:65f4d98982b31b60026e0e6de73fbdfc09d08a96f4656dd3665ca616a11e1e82"},
@@ -2509,6 +2658,8 @@ version = "1.0.1"
 description = "Ordered Multivalue Dictionary"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "orderedmultidict-1.0.1-py2.py3-none-any.whl", hash = "sha256:43c839a17ee3cdd62234c47deca1a8508a3f2ca1d0678a3bf791c87cf84adbf3"},
     {file = "orderedmultidict-1.0.1.tar.gz", hash = "sha256:04070bbb5e87291cc9bfa51df413677faf2141c73c61d2a5f7b26bea3cd882ad"},
@@ -2523,6 +2674,7 @@ version = "7.7.0"
 description = "A decorator to automatically detect mismatch when overriding a method."
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "overrides-7.7.0-py3-none-any.whl", hash = "sha256:c7ed9d062f78b8e4c1a7b70bd8796b35ead4d9f510227ef9c5dc7626c60d7e49"},
     {file = "overrides-7.7.0.tar.gz", hash = "sha256:55158fa3d93b98cc75299b1e67078ad9003ca27945c76162c1c0766d6f91820a"},
@@ -2534,10 +2686,12 @@ version = "24.1"
 description = "Core utilities for Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "packaging-24.1-py3-none-any.whl", hash = "sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124"},
     {file = "packaging-24.1.tar.gz", hash = "sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [[package]]
 name = "pandas"
@@ -2545,6 +2699,7 @@ version = "2.2.3"
 description = "Powerful data structures for data analysis, time series, and statistics"
 optional = false
 python-versions = ">=3.9"
+groups = ["main", "dev"]
 files = [
     {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"},
     {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"},
@@ -2631,6 +2786,7 @@ version = "1.5.1"
 description = "Utilities for writing pandoc filters in python"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
+groups = ["dev"]
 files = [
     {file = "pandocfilters-1.5.1-py2.py3-none-any.whl", hash = "sha256:93be382804a9cdb0a7267585f157e5d1731bbe5545a85b268d6f5fe6232de2bc"},
     {file = "pandocfilters-1.5.1.tar.gz", hash = "sha256:002b4a555ee4ebc03f8b66307e287fa492e4a77b4ea14d3f934328297bb4939e"},
@@ -2642,6 +2798,7 @@ version = "0.8.4"
 description = "A Python Parser"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18"},
     {file = "parso-0.8.4.tar.gz", hash = "sha256:eb3a7b58240fb99099a345571deecc0f9540ea5f4dd2fe14c2a99d6b281ab92d"},
@@ -2657,6 +2814,8 @@ version = "2.3.7.post1"
 description = "Object-oriented filesystem paths"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "pathlib2-2.3.7.post1-py2.py3-none-any.whl", hash = "sha256:5266a0fd000452f1b3467d782f079a4343c63aaa119221fbdc4e39577489ca5b"},
     {file = "pathlib2-2.3.7.post1.tar.gz", hash = "sha256:9fe0edad898b83c0c3e199c842b27ed216645d2e177757b2dd67384d4113c641"},
@@ -2671,6 +2830,7 @@ version = "0.12.1"
 description = "Utility library for gitignore style pattern matching of file paths."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08"},
     {file = "pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712"},
@@ -2682,6 +2842,7 @@ version = "0.14.1"
 description = "Check PEP-8 naming conventions, plugin for flake8"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pep8-naming-0.14.1.tar.gz", hash = "sha256:1ef228ae80875557eb6c1549deafed4dabbf3261cfcafa12f773fe0db9be8a36"},
     {file = "pep8_naming-0.14.1-py3-none-any.whl", hash = "sha256:63f514fc777d715f935faf185dedd679ab99526a7f2f503abb61587877f7b1c5"},
@@ -2696,6 +2857,8 @@ version = "4.9.0"
 description = "Pexpect allows easy control of interactive console applications."
 optional = false
 python-versions = "*"
+groups = ["dev"]
+markers = "sys_platform != \"win32\""
 files = [
     {file = "pexpect-4.9.0-py2.py3-none-any.whl", hash = "sha256:7236d1e080e4936be2dc3e326cec0af72acf9212a7e1d060210e70a47e253523"},
     {file = "pexpect-4.9.0.tar.gz", hash = "sha256:ee7d41123f3c9911050ea2c2dac107568dc43b2d3b0c7557a33212c398ead30f"},
@@ -2710,6 +2873,8 @@ version = "11.0.0"
 description = "Python Imaging Library (Fork)"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "pillow-11.0.0-cp310-cp310-macosx_10_10_x86_64.whl", hash = "sha256:6619654954dc4936fcff82db8eb6401d3159ec6be81e33c6000dfd76ae189947"},
     {file = "pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:b3c5ac4bed7519088103d9450a1107f76308ecf91d6dabc8a33a2fcfb18d0fba"},
@@ -2793,7 +2958,7 @@ docs = ["furo", "olefile", "sphinx (>=8.1)", "sphinx-copybutton", "sphinx-inline
 fpx = ["olefile"]
 mic = ["olefile"]
 tests = ["check-manifest", "coverage", "defusedxml", "markdown2", "olefile", "packaging", "pyroma", "pytest", "pytest-cov", "pytest-timeout"]
-typing = ["typing-extensions"]
+typing = ["typing-extensions ; python_version < \"3.10\""]
 xmp = ["defusedxml"]
 
 [[package]]
@@ -2802,6 +2967,7 @@ version = "4.3.6"
 description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"},
     {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"},
@@ -2818,6 +2984,7 @@ version = "1.5.0"
 description = "plugin and hook calling mechanisms for python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pluggy-1.5.0-py3-none-any.whl", hash = "sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669"},
     {file = "pluggy-1.5.0.tar.gz", hash = "sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1"},
@@ -2833,6 +3000,7 @@ version = "0.21.0"
 description = "Python client for the Prometheus monitoring system."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "prometheus_client-0.21.0-py3-none-any.whl", hash = "sha256:4fa6b4dd0ac16d58bb587c04b1caae65b8c5043e85f778f42f5f632f6af2e166"},
     {file = "prometheus_client-0.21.0.tar.gz", hash = "sha256:96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e"},
@@ -2847,6 +3015,7 @@ version = "3.0.48"
 description = "Library for building powerful interactive command lines in Python"
 optional = false
 python-versions = ">=3.7.0"
+groups = ["dev"]
 files = [
     {file = "prompt_toolkit-3.0.48-py3-none-any.whl", hash = "sha256:f49a827f90062e411f1ce1f854f2aedb3c23353244f8108b89283587397ac10e"},
     {file = "prompt_toolkit-3.0.48.tar.gz", hash = "sha256:d6623ab0477a80df74e646bdbc93621143f5caf104206aa29294d53de1a03d90"},
@@ -2861,6 +3030,8 @@ version = "0.2.0"
 description = "Accelerated property cache"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"},
     {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:952e0d9d07609d9c5be361f33b0d6d650cd2bae393aabb11d9b719364521984b"},
@@ -2968,6 +3139,7 @@ version = "6.1.0"
 description = "Cross-platform lib for process and system monitoring in Python."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "psutil-6.1.0-cp27-cp27m-macosx_10_9_x86_64.whl", hash = "sha256:ff34df86226c0227c52f38b919213157588a678d049688eded74c76c8ba4a5d0"},
     {file = "psutil-6.1.0-cp27-cp27m-manylinux2010_i686.whl", hash = "sha256:c0e0c00aa18ca2d3b2b991643b799a15fc8f0563d2ebb6040f64ce8dc027b942"},
@@ -2987,6 +3159,7 @@ files = [
     {file = "psutil-6.1.0-cp37-abi3-win_amd64.whl", hash = "sha256:a8fb3752b491d246034fa4d279ff076501588ce8cbcdbb62c32fd7a377d996be"},
     {file = "psutil-6.1.0.tar.gz", hash = "sha256:353815f59a7f64cdaca1c0307ee13558a0512f6db064e92fe833784f08539c7a"},
 ]
+markers = {main = "extra == \"jobs\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.extras]
 dev = ["black", "check-manifest", "coverage", "packaging", "pylint", "pyperf", "pypinfo", "pytest-cov", "requests", "rstcheck", "ruff", "sphinx", "sphinx_rtd_theme", "toml-sort", "twine", "virtualenv", "wheel"]
@@ -2998,6 +3171,8 @@ version = "0.7.0"
 description = "Run a subprocess in a pseudo terminal"
 optional = false
 python-versions = "*"
+groups = ["dev"]
+markers = "sys_platform != \"win32\" or os_name != \"nt\""
 files = [
     {file = "ptyprocess-0.7.0-py2.py3-none-any.whl", hash = "sha256:4b41f3967fce3af57cc7e94b888626c18bf37a083e3651ca8feeb66d492fef35"},
     {file = "ptyprocess-0.7.0.tar.gz", hash = "sha256:5c5d0a3b48ceee0b48485e0c26037c0acd7d29765ca3fbb5cb3831d347423220"},
@@ -3009,6 +3184,7 @@ version = "0.2.3"
 description = "Safely evaluate AST nodes without side effects"
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0"},
     {file = "pure_eval-0.2.3.tar.gz", hash = "sha256:5f4e983f40564c576c7c8635ae88db5956bb2229d7e9237d03b3c0b0190eaf42"},
@@ -3023,6 +3199,8 @@ version = "18.0.0"
 description = "Python library for Apache Arrow"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_arm64.whl", hash = "sha256:2333f93260674e185cfbf208d2da3007132572e56871f451ba1a556b45dae6e2"},
     {file = "pyarrow-18.0.0-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:4c381857754da44326f3a49b8b199f7f87a51c2faacd5114352fc78de30d3aba"},
@@ -3077,6 +3255,7 @@ version = "2.12.1"
 description = "Python style guide checker"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pycodestyle-2.12.1-py2.py3-none-any.whl", hash = "sha256:46f0fb92069a7c28ab7bb558f05bfc0110dac69a0cd23c61ea0040283a9d78b3"},
     {file = "pycodestyle-2.12.1.tar.gz", hash = "sha256:6838eae08bbce4f6accd5d5572075c63626a15ee3e6f842df996bf62f6d73521"},
@@ -3088,6 +3267,7 @@ version = "2.22"
 description = "C parser in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"},
     {file = "pycparser-2.22.tar.gz", hash = "sha256:491c8be9c040f5390f5bf44a5b07752bd07f56edf992381b05c701439eec10f6"},
@@ -3099,6 +3279,7 @@ version = "3.2.0"
 description = "passive checker of Python programs"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pyflakes-3.2.0-py2.py3-none-any.whl", hash = "sha256:84b5be138a2dfbb40689ca07e2152deb896a65c3a3e24c251c5c62489568074a"},
     {file = "pyflakes-3.2.0.tar.gz", hash = "sha256:1c61603ff154621fb2a9172037d84dca3500def8c8b630657d1701f026f8af3f"},
@@ -3110,6 +3291,7 @@ version = "2.18.0"
 description = "Pygments is a syntax highlighting package written in Python."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pygments-2.18.0-py3-none-any.whl", hash = "sha256:b8e6aca0523f3ab76fee51799c488e38782ac06eafcf95e7ba832985c8e7b13a"},
     {file = "pygments-2.18.0.tar.gz", hash = "sha256:786ff802f32e91311bff3889f6e9a86e81505fe99f2735bb6d60ae0c5004f199"},
@@ -3124,6 +3306,8 @@ version = "2.8.0"
 description = "JSON Web Token implementation in Python"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "PyJWT-2.8.0-py3-none-any.whl", hash = "sha256:59127c392cc44c2da5bb3192169a91f429924e17aff6534d70fdc02ab3e04320"},
     {file = "PyJWT-2.8.0.tar.gz", hash = "sha256:57e28d156e3d5c10088e0c68abb90bfac3df82b40a71bd0daa20c65ccd5c23de"},
@@ -3141,6 +3325,8 @@ version = "3.2.0"
 description = "pyparsing module - Classes and methods to define and execute parsing grammars"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"jobs\""
 files = [
     {file = "pyparsing-3.2.0-py3-none-any.whl", hash = "sha256:93d9577b88da0bbea8cc8334ee8b918ed014968fd2ec383e868fb8afb1ccef84"},
     {file = "pyparsing-3.2.0.tar.gz", hash = "sha256:cbf74e27246d595d9a74b186b810f6fbb86726dbf3b9532efb343f6d7294fe9c"},
@@ -3151,13 +3337,14 @@ diagrams = ["jinja2", "railroad-diagrams"]
 
 [[package]]
 name = "pyright"
-version = "1.1.386"
+version = "1.1.399"
 description = "Command line wrapper for pyright"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
-    {file = "pyright-1.1.386-py3-none-any.whl", hash = "sha256:7071ac495593b2258ccdbbf495f1a5c0e5f27951f6b429bed4e8b296eb5cd21d"},
-    {file = "pyright-1.1.386.tar.gz", hash = "sha256:8e9975e34948ba5f8e07792a9c9d2bdceb2c6c0b61742b068d2229ca2bc4a9d9"},
+    {file = "pyright-1.1.399-py3-none-any.whl", hash = "sha256:55f9a875ddf23c9698f24208c764465ffdfd38be6265f7faf9a176e1dc549f3b"},
+    {file = "pyright-1.1.399.tar.gz", hash = "sha256:439035d707a36c3d1b443aec980bc37053fbda88158eded24b8eedcf1c7b7a1b"},
 ]
 
 [package.dependencies]
@@ -3176,6 +3363,7 @@ version = "8.3.3"
 description = "pytest: simple powerful testing with Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "pytest-8.3.3-py3-none-any.whl", hash = "sha256:a6853c7375b2663155079443d2e45de913a911a11d669df02a50814944db57b2"},
     {file = "pytest-8.3.3.tar.gz", hash = "sha256:70b98107bd648308a7952b06e6ca9a50bc660be218d53c257cc1fc94fda10181"},
@@ -3198,6 +3386,7 @@ version = "4.1.0"
 description = "Pytest plugin for measuring coverage."
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "pytest-cov-4.1.0.tar.gz", hash = "sha256:3904b13dfbfec47f003b8e77fd5b589cd11904a21ddf1ab38a64f204d6a10ef6"},
     {file = "pytest_cov-4.1.0-py3-none-any.whl", hash = "sha256:6ba70b9e97e69fcc3fb45bfeab2d0a138fb65c4d0d6a41ef33983ad114be8c3a"},
@@ -3216,6 +3405,7 @@ version = "2.9.0.post0"
 description = "Extensions to the standard Python datetime module"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
+groups = ["main", "dev"]
 files = [
     {file = "python-dateutil-2.9.0.post0.tar.gz", hash = "sha256:37dd54208da7e1cd875388217d5e00ebd4179249f90fb72437e91a35459a0ad3"},
     {file = "python_dateutil-2.9.0.post0-py2.py3-none-any.whl", hash = "sha256:a8b2bc7bffae282281c8140a97d3aa9c14da0b136dfe83f850eea9a5f7470427"},
@@ -3230,6 +3420,7 @@ version = "2.0.7"
 description = "A python library adding a json log formatter"
 optional = false
 python-versions = ">=3.6"
+groups = ["dev"]
 files = [
     {file = "python-json-logger-2.0.7.tar.gz", hash = "sha256:23e7ec02d34237c5aa1e29a070193a4ea87583bb4e7f8fd06d3de8264c4b2e1c"},
     {file = "python_json_logger-2.0.7-py3-none-any.whl", hash = "sha256:f380b826a991ebbe3de4d897aeec42760035ac760345e57b812938dc8b35e2bd"},
@@ -3241,6 +3432,7 @@ version = "2024.2"
 description = "World timezone definitions, modern and historical"
 optional = false
 python-versions = "*"
+groups = ["main", "dev"]
 files = [
     {file = "pytz-2024.2-py2.py3-none-any.whl", hash = "sha256:31c7c1817eb7fae7ca4b8c7ee50c72f93aa2dd863de768e1ef4245d426aa0725"},
     {file = "pytz-2024.2.tar.gz", hash = "sha256:2aa355083c50a0f93fa581709deac0c9ad65cca8a9e9beac660adcbd493c798a"},
@@ -3252,6 +3444,8 @@ version = "308"
 description = "Python for Window Extensions"
 optional = false
 python-versions = "*"
+groups = ["dev"]
+markers = "sys_platform == \"win32\" and platform_python_implementation != \"PyPy\""
 files = [
     {file = "pywin32-308-cp310-cp310-win32.whl", hash = "sha256:796ff4426437896550d2981b9c2ac0ffd75238ad9ea2d3bfa67a1abd546d262e"},
     {file = "pywin32-308-cp310-cp310-win_amd64.whl", hash = "sha256:4fc888c59b3c0bef905ce7eb7e2106a07712015ea1c8234b703a088d46110e8e"},
@@ -3279,6 +3473,8 @@ version = "2.0.14"
 description = "Pseudo terminal support for Windows from Python."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "os_name == \"nt\""
 files = [
     {file = "pywinpty-2.0.14-cp310-none-win_amd64.whl", hash = "sha256:0b149c2918c7974f575ba79f5a4aad58bd859a52fa9eb1296cc22aa412aa411f"},
     {file = "pywinpty-2.0.14-cp311-none-win_amd64.whl", hash = "sha256:cf2a43ac7065b3e0dc8510f8c1f13a75fb8fde805efa3b8cff7599a1ef497bc7"},
@@ -3294,6 +3490,7 @@ version = "6.0.2"
 description = "YAML parser and emitter for Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0a9a2848a5b7feac301353437eb7d5957887edbf81d56e903999a75a3d743086"},
     {file = "PyYAML-6.0.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:29717114e51c84ddfba879543fb232a6ed60086602313ca38cce623c1d62cfbf"},
@@ -3349,6 +3546,7 @@ files = [
     {file = "PyYAML-6.0.2-cp39-cp39-win_amd64.whl", hash = "sha256:39693e1f8320ae4f43943590b49779ffb98acb81f788220ea932a6b6c51004d8"},
     {file = "pyyaml-6.0.2.tar.gz", hash = "sha256:d584d9ec91ad65861cc08d42e834324ef890a082e591037abe114850ff7bbc3e"},
 ]
+markers = {main = "extra == \"huggingface\" or extra == \"jobs\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [[package]]
 name = "pyzmq"
@@ -3356,6 +3554,7 @@ version = "26.2.0"
 description = "Python bindings for 0MQ"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_15_universal2.whl", hash = "sha256:ddf33d97d2f52d89f6e6e7ae66ee35a4d9ca6f36eda89c24591b0c40205a3629"},
     {file = "pyzmq-26.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:dacd995031a01d16eec825bf30802fceb2c3791ef24bcce48fa98ce40918c27b"},
@@ -3477,10 +3676,12 @@ version = "0.35.1"
 description = "JSON Referencing + Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"},
     {file = "referencing-0.35.1.tar.gz", hash = "sha256:25b42124a6c8b632a425174f24087783efb348a6f1e0008e63cd4466fedf703c"},
 ]
+markers = {main = "extra == \"jobs\""}
 
 [package.dependencies]
 attrs = ">=22.2.0"
@@ -3492,6 +3693,7 @@ version = "2024.9.11"
 description = "Alternative regular expression module, to replace re."
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"},
     {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:0e12c481ad92d129c78f13a2a3662317e46ee7ef96c94fd332e1c29131875b7d"},
@@ -3595,10 +3797,12 @@ version = "2.32.3"
 description = "Python HTTP for Humans."
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "requests-2.32.3-py3-none-any.whl", hash = "sha256:70761cfe03c773ceb22aa2f671b4757976145175cdfca038c02654d061d6dcc6"},
     {file = "requests-2.32.3.tar.gz", hash = "sha256:55365417734eb18255590a9ff9eb97e9e1da868d4ccd6402399eaf68af20a760"},
 ]
+markers = {main = "extra == \"huggingface\" or extra == \"jobs\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.dependencies]
 certifi = ">=2017.4.17"
@@ -3616,6 +3820,7 @@ version = "0.1.4"
 description = "A pure python RFC3339 validator"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["dev"]
 files = [
     {file = "rfc3339_validator-0.1.4-py2.py3-none-any.whl", hash = "sha256:24f6ec1eda14ef823da9e36ec7113124b39c04d50a4d3d3a3c2859577e7791fa"},
     {file = "rfc3339_validator-0.1.4.tar.gz", hash = "sha256:138a2abdf93304ad60530167e51d2dfb9549521a836871b88d7f4695d0022f6b"},
@@ -3630,6 +3835,7 @@ version = "0.1.1"
 description = "Pure python rfc3986 validator"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
+groups = ["dev"]
 files = [
     {file = "rfc3986_validator-0.1.1-py2.py3-none-any.whl", hash = "sha256:2f235c432ef459970b4306369336b9d5dbdda31b510ca1e327636e01f528bfa9"},
     {file = "rfc3986_validator-0.1.1.tar.gz", hash = "sha256:3d44bde7921b3b9ec3ae4e3adca370438eccebc676456449b145d533b240d055"},
@@ -3641,6 +3847,7 @@ version = "0.20.0"
 description = "Python bindings to Rust's persistent data structures (rpds)"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev"]
 files = [
     {file = "rpds_py-0.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3ad0fda1635f8439cde85c700f964b23ed5fc2d28016b32b9ee5fe30da5c84e2"},
     {file = "rpds_py-0.20.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:9bb4a0d90fdb03437c109a17eade42dfbf6190408f29b2744114d11586611d6f"},
@@ -3746,6 +3953,7 @@ files = [
     {file = "rpds_py-0.20.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:fdfc3a892927458d98f3d55428ae46b921d1f7543b89382fdb483f5640daaec8"},
     {file = "rpds_py-0.20.0.tar.gz", hash = "sha256:d72a210824facfdaf8768cf2d7ca25a042c30320b3020de2fa04640920d4e121"},
 ]
+markers = {main = "extra == \"jobs\""}
 
 [[package]]
 name = "s3transfer"
@@ -3753,6 +3961,7 @@ version = "0.10.3"
 description = "An Amazon S3 Transfer Manager"
 optional = false
 python-versions = ">=3.8"
+groups = ["main"]
 files = [
     {file = "s3transfer-0.10.3-py3-none-any.whl", hash = "sha256:263ed587a5803c6c708d3ce44dc4dfedaab4c1a32e8329bab818933d79ddcf5d"},
     {file = "s3transfer-0.10.3.tar.gz", hash = "sha256:4f50ed74ab84d474ce614475e0b8d5047ff080810aac5d01ea25231cfc944b0c"},
@@ -3770,6 +3979,8 @@ version = "0.0.53"
 description = "SacreMoses"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "sacremoses-0.0.53.tar.gz", hash = "sha256:43715868766c643b35de4b8046cce236bfe59a7fa88b25eaf6ddf02bacf53a7a"},
 ]
@@ -3787,6 +3998,7 @@ version = "0.4.5"
 description = ""
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "gpu"]
 files = [
     {file = "safetensors-0.4.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:a63eaccd22243c67e4f2b1c3e258b257effc4acd78f3b9d397edc8cf8f1298a7"},
     {file = "safetensors-0.4.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:23fc9b4ec7b602915cbb4ec1a7c1ad96d2743c322f20ab709e2c35d1b66dad27"},
@@ -3899,6 +4111,7 @@ files = [
     {file = "safetensors-0.4.5-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:834001bed193e4440c4a3950a31059523ee5090605c907c66808664c932b549c"},
     {file = "safetensors-0.4.5.tar.gz", hash = "sha256:d73de19682deabb02524b3d5d1f8b3aaba94c72f1bbfc7911b9b9d5d391c0310"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.extras]
 all = ["safetensors[jax]", "safetensors[numpy]", "safetensors[paddlepaddle]", "safetensors[pinned-tf]", "safetensors[quality]", "safetensors[testing]", "safetensors[torch]"]
@@ -3919,15 +4132,16 @@ version = "1.8.3"
 description = "Send file to trash natively under Mac OS X, Windows and Linux"
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["dev"]
 files = [
     {file = "Send2Trash-1.8.3-py3-none-any.whl", hash = "sha256:0c31227e0bd08961c7665474a3d1ef7193929fedda4233843689baa056be46c9"},
     {file = "Send2Trash-1.8.3.tar.gz", hash = "sha256:b18e7a3966d99871aefeb00cfbcfdced55ce4871194810fc71f4aa484b953abf"},
 ]
 
 [package.extras]
-nativelib = ["pyobjc-framework-Cocoa", "pywin32"]
-objc = ["pyobjc-framework-Cocoa"]
-win32 = ["pywin32"]
+nativelib = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\"", "pywin32 ; sys_platform == \"win32\""]
+objc = ["pyobjc-framework-Cocoa ; sys_platform == \"darwin\""]
+win32 = ["pywin32 ; sys_platform == \"win32\""]
 
 [[package]]
 name = "sentencepiece"
@@ -3935,6 +4149,8 @@ version = "0.2.0"
 description = "SentencePiece python wrapper"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "extra == \"sentencepiece\""
 files = [
     {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:188779e1298a1c8b8253c7d3ad729cb0a9891e5cef5e5d07ce4592c54869e227"},
     {file = "sentencepiece-0.2.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bed9cf85b296fa2b76fc2547b9cbb691a523864cebaee86304c43a7b4cb1b452"},
@@ -3997,19 +4213,20 @@ version = "75.3.0"
 description = "Easily download, build, install, upgrade, and uninstall Python packages"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "setuptools-75.3.0-py3-none-any.whl", hash = "sha256:f2504966861356aa38616760c0f66568e535562374995367b4e69c7143cf6bcd"},
     {file = "setuptools-75.3.0.tar.gz", hash = "sha256:fba5dd4d766e97be1b1681d98712680ae8f2f26d7881245f2ce9e40714f1a686"},
 ]
 
 [package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"]
-core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\"", "ruff (>=0.5.2) ; sys_platform != \"cygwin\""]
+core = ["importlib-metadata (>=6) ; python_version < \"3.10\"", "importlib-resources (>=5.10.2) ; python_version < \"3.9\"", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=4.2.2)", "tomli (>=2.0.1) ; python_version < \"3.11\"", "wheel (>=0.43.0)"]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"]
 enabler = ["pytest-enabler (>=2.2)"]
-test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21)", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
-type = ["importlib-metadata (>=7.0.2)", "jaraco.develop (>=7.21)", "mypy (==1.12.*)", "pytest-mypy"]
+test = ["build[virtualenv] (>=1.0.3)", "filelock (>=3.4.0)", "ini2toml[lite] (>=0.14)", "jaraco.develop (>=7.21) ; python_version >= \"3.9\" and sys_platform != \"cygwin\"", "jaraco.envs (>=2.2)", "jaraco.path (>=3.2.0)", "jaraco.test (>=5.5)", "packaging (>=23.2)", "pip (>=19.1)", "pyproject-hooks (!=1.1)", "pytest (>=6,!=8.1.*)", "pytest-home (>=0.5)", "pytest-perf ; sys_platform != \"cygwin\"", "pytest-subprocess", "pytest-timeout", "pytest-xdist (>=3)", "tomli-w (>=1.0.0)", "virtualenv (>=13.0.0)", "wheel (>=0.44.0)"]
+type = ["importlib-metadata (>=7.0.2) ; python_version < \"3.10\"", "jaraco.develop (>=7.21) ; sys_platform != \"cygwin\"", "mypy (==1.12.*)", "pytest-mypy"]
 
 [[package]]
 name = "sil-thot"
@@ -4017,6 +4234,8 @@ version = "3.4.6"
 description = "A toolkit for statistical word alignment and machine translation"
 optional = false
 python-versions = "<4.0,>=3.7"
+groups = ["main"]
+markers = "extra == \"thot\""
 files = [
     {file = "sil_thot-3.4.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba4a3ac7310dc4e51f81483d21e4cc461ef803968647c05e7daad7dc6d973504"},
     {file = "sil_thot-3.4.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a90640851f9b93c92f94c588814258503b5c06f7dfc7d8578b4b0222fa5be87"},
@@ -4054,6 +4273,7 @@ version = "1.16.0"
 description = "Python 2 and 3 compatibility utilities"
 optional = false
 python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
+groups = ["main", "dev"]
 files = [
     {file = "six-1.16.0-py2.py3-none-any.whl", hash = "sha256:8abb2f1d86890a2dfb989f9a77cfcfd3e47c2a354b01111771326f8aa26e0254"},
     {file = "six-1.16.0.tar.gz", hash = "sha256:1e61c37477a1626458e36f7b1d82aa5c9b094fa4802892072e49de9c60c4c926"},
@@ -4065,6 +4285,7 @@ version = "1.3.1"
 description = "Sniff out which async library your code is running under"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "sniffio-1.3.1-py3-none-any.whl", hash = "sha256:2f6da418d1f1e0fddd844478f41680e794e6051915791a034ff65e5f100525a2"},
     {file = "sniffio-1.3.1.tar.gz", hash = "sha256:f4324edc670a0f49750a81b895f35c3adb843cca46f0530f79fc1babb23789dc"},
@@ -4076,6 +4297,7 @@ version = "2.4.0"
 description = "Sorted Containers -- Sorted List, Sorted Dict, Sorted Set"
 optional = false
 python-versions = "*"
+groups = ["main"]
 files = [
     {file = "sortedcontainers-2.4.0-py2.py3-none-any.whl", hash = "sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0"},
     {file = "sortedcontainers-2.4.0.tar.gz", hash = "sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88"},
@@ -4087,6 +4309,7 @@ version = "2.6"
 description = "A modern CSS selector implementation for Beautiful Soup."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "soupsieve-2.6-py3-none-any.whl", hash = "sha256:e72c4ff06e4fb6e4b5a9f0f55fe6e81514581fca1515028625d0f299c602ccc9"},
     {file = "soupsieve-2.6.tar.gz", hash = "sha256:e2e68417777af359ec65daac1057404a3c8a5455bb8abc36f1a9866ab1a51abb"},
@@ -4098,6 +4321,7 @@ version = "0.6.3"
 description = "Extract data from python stack frames and tracebacks for informative displays"
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "stack_data-0.6.3-py3-none-any.whl", hash = "sha256:d5558e0c25a4cb0853cddad3d77da9891a08cb85dd9f9f91b9f8cd66e511e695"},
     {file = "stack_data-0.6.3.tar.gz", hash = "sha256:836a778de4fec4dcd1dcd89ed8abff8a221f58308462e1c4aa2a3cf30148f0b9"},
@@ -4117,6 +4341,8 @@ version = "1.13.3"
 description = "Computer algebra system (CAS) in Python"
 optional = false
 python-versions = ">=3.8"
+groups = ["gpu"]
+markers = "sys_platform == \"win32\" or sys_platform == \"linux\""
 files = [
     {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"},
     {file = "sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9"},
@@ -4134,6 +4360,7 @@ version = "0.18.1"
 description = "Tornado websocket backend for the Xterm.js Javascript terminal emulator library."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "terminado-0.18.1-py3-none-any.whl", hash = "sha256:a4468e1b37bb318f8a86514f65814e1afc977cf29b3992a4500d9dd305dcceb0"},
     {file = "terminado-0.18.1.tar.gz", hash = "sha256:de09f2c4b85de4765f7714688fff57d3e75bad1f909b589fde880460c753fd2e"},
@@ -4155,6 +4382,7 @@ version = "1.4.0"
 description = "A tiny CSS parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "tinycss2-1.4.0-py3-none-any.whl", hash = "sha256:3a49cf47b7675da0b15d0c6e1df8df4ebd96e9394bb905a5775adb0d884c5289"},
     {file = "tinycss2-1.4.0.tar.gz", hash = "sha256:10c0972f6fc0fbee87c3edb76549357415e94548c1ae10ebccdea16fb404a9b7"},
@@ -4173,6 +4401,8 @@ version = "0.20.1"
 description = ""
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "tokenizers-0.20.1-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:439261da7c0a5c88bda97acb284d49fbdaf67e9d3b623c0bfd107512d22787a9"},
     {file = "tokenizers-0.20.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:03dae629d99068b1ea5416d50de0fea13008f04129cc79af77a2a6392792d93c"},
@@ -4290,6 +4520,8 @@ version = "2.0.2"
 description = "A lil' TOML parser"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version < \"3.11\""
 files = [
     {file = "tomli-2.0.2-py3-none-any.whl", hash = "sha256:2ebe24485c53d303f690b0ec092806a085f07af5a5aa1464f3931eec36caaa38"},
     {file = "tomli-2.0.2.tar.gz", hash = "sha256:d46d457a85337051c36524bc5349dd91b1877838e2979ac5ced3e710ed8a60ed"},
@@ -4301,6 +4533,8 @@ version = "2.4.0"
 description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["gpu"]
+markers = "sys_platform == \"win32\" or sys_platform == \"linux\""
 files = [
     {file = "torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:4ed94583e244af51d6a8d28701ca5a9e02d1219e782f5a01dd401f90af17d8ac"},
     {file = "torch-2.4.0-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:c4ca297b7bd58b506bfd6e78ffd14eb97c0e7797dcd7965df62f50bb575d8954"},
@@ -4354,6 +4588,7 @@ version = "6.4.1"
 description = "Tornado is a Python web framework and asynchronous networking library, originally developed at FriendFeed."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_universal2.whl", hash = "sha256:163b0aafc8e23d8cdc3c9dfb24c5368af84a81e3364745ccb4427669bf84aec8"},
     {file = "tornado-6.4.1-cp38-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6d5ce3437e18a2b66fbadb183c1d3364fb03f2be71299e7d10dbeeb69f4b2a14"},
@@ -4374,10 +4609,12 @@ version = "4.66.6"
 description = "Fast, Extensible Progress Meter"
 optional = false
 python-versions = ">=3.7"
+groups = ["main", "gpu"]
 files = [
     {file = "tqdm-4.66.6-py3-none-any.whl", hash = "sha256:223e8b5359c2efc4b30555531f09e9f2f3589bcd7fdd389271191031b49b7a63"},
     {file = "tqdm-4.66.6.tar.gz", hash = "sha256:4bdd694238bef1485ce839d67967ab50af8f9272aab687c0d7702a01da0be090"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}
@@ -4394,6 +4631,7 @@ version = "5.14.3"
 description = "Traitlets Python configuration system"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "traitlets-5.14.3-py3-none-any.whl", hash = "sha256:b74e89e397b1ed28cc831db7aea759ba6640cb3de13090ca145426688ff1ac4f"},
     {file = "traitlets-5.14.3.tar.gz", hash = "sha256:9ed0579d3502c94b4b3732ac120375cda96f923114522847de4b3bb98b96b6b7"},
@@ -4409,6 +4647,8 @@ version = "4.45.2"
 description = "State-of-the-art Machine Learning for JAX, PyTorch and TensorFlow"
 optional = false
 python-versions = ">=3.8.0"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "transformers-4.45.2-py3-none-any.whl", hash = "sha256:c551b33660cfc815bae1f9f097ecfd1e65be623f13c6ee0dda372bd881460210"},
     {file = "transformers-4.45.2.tar.gz", hash = "sha256:72bc390f6b203892561f05f86bbfaa0e234aab8e927a83e62b9d92ea7e3ae101"},
@@ -4478,6 +4718,8 @@ version = "3.0.0"
 description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
+groups = ["gpu"]
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and (sys_platform == \"win32\" or sys_platform == \"linux\")"
 files = [
     {file = "triton-3.0.0-1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:e1efef76935b2febc365bfadf74bcb65a6f959a9872e5bddf44cc9e0adce1e1a"},
     {file = "triton-3.0.0-1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.whl", hash = "sha256:5ce8520437c602fb633f1324cc3871c47bee3b67acf9756c1a66309b60e3216c"},
@@ -4500,6 +4742,7 @@ version = "2.9.0.20241003"
 description = "Typing stubs for python-dateutil"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "types-python-dateutil-2.9.0.20241003.tar.gz", hash = "sha256:58cb85449b2a56d6684e41aeefb4c4280631246a0da1a719bdbe6f3fb0317446"},
     {file = "types_python_dateutil-2.9.0.20241003-py3-none-any.whl", hash = "sha256:250e1d8e80e7bbc3a6c99b907762711d1a1cdd00e978ad39cb5940f6f0a87f3d"},
@@ -4511,10 +4754,12 @@ version = "4.12.2"
 description = "Backported and Experimental Type Hints for Python 3.8+"
 optional = false
 python-versions = ">=3.8"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
     {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
 ]
+markers = {main = "extra == \"huggingface\"", gpu = "sys_platform == \"win32\" or sys_platform == \"linux\""}
 
 [[package]]
 name = "tzdata"
@@ -4522,6 +4767,7 @@ version = "2024.2"
 description = "Provider of IANA time zone data"
 optional = false
 python-versions = ">=2"
+groups = ["main", "dev"]
 files = [
     {file = "tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd"},
     {file = "tzdata-2024.2.tar.gz", hash = "sha256:7d85cc416e9382e69095b7bdf4afd9e3880418a2413feec7069d533d6b4e31cc"},
@@ -4533,6 +4779,7 @@ version = "1.3.0"
 description = "RFC 6570 URI Template Processor"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "uri-template-1.3.0.tar.gz", hash = "sha256:0e00f8eb65e18c7de20d595a14336e9f337ead580c70934141624b6d1ffdacc7"},
     {file = "uri_template-1.3.0-py3-none-any.whl", hash = "sha256:a44a133ea12d44a0c0f06d7d42a52d71282e77e2f937d8abd5655b8d56fc1363"},
@@ -4547,14 +4794,15 @@ version = "1.26.20"
 description = "HTTP library with thread-safe connection pooling, file post, and more."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,>=2.7"
+groups = ["main", "dev", "gpu"]
 files = [
     {file = "urllib3-1.26.20-py2.py3-none-any.whl", hash = "sha256:0ed14ccfbf1c30a9072c7ca157e4319b70d65f623e91e7b32fadb2853431016e"},
     {file = "urllib3-1.26.20.tar.gz", hash = "sha256:40c2dc0c681e47eb8f90e7e27bf6ff7df2e677421fd46756da1161c39ca70d32"},
 ]
 
 [package.extras]
-brotli = ["brotli (==1.0.9)", "brotli (>=1.0.9)", "brotlicffi (>=0.8.0)", "brotlipy (>=0.6.0)"]
-secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
+brotli = ["brotli (==1.0.9) ; os_name != \"nt\" and python_version < \"3\" and platform_python_implementation == \"CPython\"", "brotli (>=1.0.9) ; python_version >= \"3\" and platform_python_implementation == \"CPython\"", "brotlicffi (>=0.8.0) ; (os_name != \"nt\" or python_version >= \"3\") and platform_python_implementation != \"CPython\"", "brotlipy (>=0.6.0) ; os_name == \"nt\" and python_version < \"3\""]
+secure = ["certifi", "cryptography (>=1.3.4)", "idna (>=2.0.0)", "ipaddress ; python_version == \"2.7\"", "pyOpenSSL (>=0.14)", "urllib3-secure-extra"]
 socks = ["PySocks (>=1.5.6,!=1.5.7,<2.0)"]
 
 [[package]]
@@ -4563,6 +4811,7 @@ version = "0.2.13"
 description = "Measures the displayed width of unicode strings in a terminal"
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "wcwidth-0.2.13-py2.py3-none-any.whl", hash = "sha256:3da69048e4540d84af32131829ff948f1e022c1c6bdb8d6102117aac784f6859"},
     {file = "wcwidth-0.2.13.tar.gz", hash = "sha256:72ea0c06399eb286d978fdedb6923a9eb47e1c486ce63e9b4e64fc18303972b5"},
@@ -4574,6 +4823,7 @@ version = "24.8.0"
 description = "A library for working with the color formats defined by HTML and CSS."
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "webcolors-24.8.0-py3-none-any.whl", hash = "sha256:fc4c3b59358ada164552084a8ebee637c221e4059267d0f8325b3b560f6c7f0a"},
     {file = "webcolors-24.8.0.tar.gz", hash = "sha256:08b07af286a01bcd30d583a7acadf629583d1f79bfef27dd2c2c5c263817277d"},
@@ -4589,6 +4839,7 @@ version = "0.5.1"
 description = "Character encoding aliases for legacy web content"
 optional = false
 python-versions = "*"
+groups = ["dev"]
 files = [
     {file = "webencodings-0.5.1-py2.py3-none-any.whl", hash = "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78"},
     {file = "webencodings-0.5.1.tar.gz", hash = "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923"},
@@ -4600,6 +4851,7 @@ version = "1.8.0"
 description = "WebSocket client for Python with low level API options"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
 files = [
     {file = "websocket_client-1.8.0-py3-none-any.whl", hash = "sha256:17b44cc997f5c498e809b22cdf2d9c7a9e71c02c8cc2b6c56e7c2d1239bfa526"},
     {file = "websocket_client-1.8.0.tar.gz", hash = "sha256:3239df9f44da632f96012472805d40a23281a991027ce11d2f45a6f24ac4c3da"},
@@ -4616,6 +4868,7 @@ version = "4.0.13"
 description = "Jupyter interactive widgets for Jupyter Notebook"
 optional = false
 python-versions = ">=3.7"
+groups = ["dev"]
 files = [
     {file = "widgetsnbextension-4.0.13-py3-none-any.whl", hash = "sha256:74b2692e8500525cc38c2b877236ba51d34541e6385eeed5aec15a70f88a6c71"},
     {file = "widgetsnbextension-4.0.13.tar.gz", hash = "sha256:ffcb67bc9febd10234a362795f643927f4e0c05d9342c727b65d2384f8feacb6"},
@@ -4627,6 +4880,8 @@ version = "3.5.0"
 description = "Python binding for xxHash"
 optional = false
 python-versions = ">=3.7"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "xxhash-3.5.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ece616532c499ee9afbb83078b1b952beffef121d989841f7f4b3dc5ac0fd212"},
     {file = "xxhash-3.5.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3171f693dbc2cef6477054a665dc255d996646b4023fe56cb4db80e26f4cc520"},
@@ -4759,6 +5014,8 @@ version = "1.17.0"
 description = "Yet another URL library"
 optional = false
 python-versions = ">=3.9"
+groups = ["main"]
+markers = "extra == \"huggingface\""
 files = [
     {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:2d8715edfe12eee6f27f32a3655f38d6c7410deb482158c0b7d4b7fad5d07628"},
     {file = "yarl-1.17.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1803bf2a7a782e02db746d8bd18f2384801bc1d108723840b25e065b116ad726"},
@@ -4855,17 +5112,19 @@ version = "3.20.2"
 description = "Backport of pathlib-compatible object wrapper for zip files"
 optional = false
 python-versions = ">=3.8"
+groups = ["dev"]
+markers = "python_version == \"3.9\""
 files = [
     {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"},
     {file = "zipp-3.20.2.tar.gz", hash = "sha256:bc9eb26f4506fda01b81bcde0ca78103b6e62f991b381fec825435c836edbc29"},
 ]
 
 [package.extras]
-check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1) ; sys_platform != \"cygwin\""]
 cover = ["pytest-cov"]
 doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
 enabler = ["pytest-enabler (>=2.2)"]
-test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
+test = ["big-O", "importlib-resources ; python_version < \"3.9\"", "jaraco.functools", "jaraco.itertools", "jaraco.test", "more-itertools", "pytest (>=6,!=8.1.*)", "pytest-ignore-flaky"]
 type = ["pytest-mypy"]
 
 [extras]
@@ -4875,6 +5134,6 @@ sentencepiece = ["sentencepiece"]
 thot = ["sil-thot"]
 
 [metadata]
-lock-version = "2.0"
+lock-version = "2.1"
 python-versions = ">=3.9,<3.13"
-content-hash = "b650f3e8499b348a527c5e5f0e89ba90e55fb7df93bb907cc8d8e5fdd6b63cb0"
+content-hash = "d292103e26b41fd440528597df80a64661ef21afd6be8fd07a8c34521729ad65"
diff --git a/pyproject.toml b/pyproject.toml
index 822c5ee5..853dc368 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,7 @@ pytest-cov = "^4.1.0"
 ipykernel = "^6.7.0"
 jupyter = "^1.0.0"
 pandas = "^2.0.3"
-pyright = { extras = ["nodejs"], version = "^1.1.362" }
+pyright = { extras = ["nodejs"], version = "^1.1.399" }
 decoy = "^2.1.0"
 pep8-naming = "^0.14.1"
 
diff --git a/tests/corpora/test_update_usfm_parser_handler.py b/tests/corpora/test_update_usfm_parser_handler.py
index c6cf8cea..c9ee6ba8 100644
--- a/tests/corpora/test_update_usfm_parser_handler.py
+++ b/tests/corpora/test_update_usfm_parser_handler.py
@@ -230,7 +230,7 @@ def test_paragraph_in_verse():
 
     result_strip = r"""\id MAT
 \c 1
-\p 
+\p
 \v 1 Update 1
 \s1
 \v 2

From 1fa8e82dd82d0da0e3139f0944f02d945057f19d Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Fri, 11 Apr 2025 16:21:50 -0400
Subject: [PATCH 07/11] Respond to reviewer comments Pass marker type (embed,
 style) to update block

---
 .../paratext_project_terms_parser_base.py     |  2 ++
 machine/corpora/scripture_embed.py            | 16 +++++++++++
 .../scripture_ref_usfm_parser_handler.py      | 28 ++++++-------------
 machine/corpora/scripture_update_block.py     | 18 ++++++------
 .../corpora/scripture_update_block_handler.py |  3 +-
 machine/corpora/scripture_update_element.py   | 21 +++++++++++++-
 machine/corpora/update_usfm_parser_handler.py |  3 +-
 7 files changed, 59 insertions(+), 32 deletions(-)
 create mode 100644 machine/corpora/scripture_embed.py

diff --git a/machine/corpora/paratext_project_terms_parser_base.py b/machine/corpora/paratext_project_terms_parser_base.py
index 00496443..3245c953 100644
--- a/machine/corpora/paratext_project_terms_parser_base.py
+++ b/machine/corpora/paratext_project_terms_parser_base.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import re
 from abc import ABC, abstractmethod
 from collections import defaultdict
diff --git a/machine/corpora/scripture_embed.py b/machine/corpora/scripture_embed.py
new file mode 100644
index 00000000..cc4a64f6
--- /dev/null
+++ b/machine/corpora/scripture_embed.py
@@ -0,0 +1,16 @@
+from typing import Optional
+
+EMBED_PART_START_CHAR_STYLES = ("f", "x", "z")
+EMBED_STYLES = ("f", "fe", "fig", "fm", "x")
+
+
+def is_note_text(marker: Optional[str]) -> bool:
+    return marker == "ft"
+
+
+def is_embed_part_style(marker: Optional[str]) -> bool:
+    return marker is not None and marker.startswith(EMBED_PART_START_CHAR_STYLES)
+
+
+def is_embed_style(marker: Optional[str]) -> bool:
+    return marker is not None and marker.strip("*") in EMBED_STYLES
diff --git a/machine/corpora/scripture_ref_usfm_parser_handler.py b/machine/corpora/scripture_ref_usfm_parser_handler.py
index f9bd263d..5dc6783a 100644
--- a/machine/corpora/scripture_ref_usfm_parser_handler.py
+++ b/machine/corpora/scripture_ref_usfm_parser_handler.py
@@ -5,6 +5,7 @@
 from ..scripture.verse_ref import VerseRef, are_overlapping_verse_ranges
 from .corpora_utils import merge_verse_ranges
 from .scripture_element import ScriptureElement
+from .scripture_embed import EMBED_PART_START_CHAR_STYLES, is_embed_part_style, is_embed_style, is_note_text
 from .scripture_ref import ScriptureRef
 from .usfm_parser_handler import UsfmParserHandler
 from .usfm_parser_state import UsfmParserState
@@ -18,10 +19,6 @@ class ScriptureTextType(Enum):
     NOTE_TEXT = auto()
 
 
-EMBED_PART_START_CHAR_STYLES = ("f", "x", "z")
-EMBED_STYLES = ("f", "fe", "fig", "fm", "x")
-
-
 class ScriptureRefUsfmParserHandler(UsfmParserHandler, ABC):
     def __init__(self) -> None:
         self._cur_verse_ref: VerseRef = VerseRef()
@@ -152,27 +149,27 @@ def opt_break(self, state: UsfmParserState) -> None:
     def start_char(
         self, state: UsfmParserState, marker: str, unknown: bool, attributes: Optional[Sequence[UsfmAttribute]]
     ) -> None:
-        if self._is_embed_part_style(marker) and self._in_note_text:
+        if is_embed_part_style(marker) and self._in_note_text:
             self._in_nested_embed = True
         # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
         self._check_convert_verse_para_to_non_verse(state)
 
-        if self._is_embed_style(marker):
+        if is_embed_style(marker):
             self._in_embed = True
             self._start_embed_wrapper(state, marker)
 
-        if self._is_note_text(marker):
+        if is_note_text(marker):
             self._start_note_text_wrapper(state)
 
     def end_char(
         self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
     ) -> None:
-        if self._is_embed_part_style(marker):
+        if is_embed_part_style(marker):
             if self._in_nested_embed:
                 self._in_nested_embed = False
             elif self._is_note_text(marker):
                 self._end_note_text_wrapper(state)
-        if self._is_embed_style(marker):
+        if is_embed_style(marker):
             self._end_embed(state, marker, attributes, closed)
             self._in_embed = False
 
@@ -237,7 +234,7 @@ def _end_parent_element(self) -> None:
         self._cur_elements_stack.pop()
 
     def _end_embed_elements(self) -> None:
-        if self._cur_elements_stack and self._is_embed_style(self._cur_elements_stack[-1].name):
+        if self._cur_elements_stack and is_embed_style(self._cur_elements_stack[-1].name):
             self._cur_elements_stack.pop()
 
     def _create_verse_refs(self) -> List[ScriptureRef]:
@@ -268,7 +265,7 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
             self._start_non_verse_text_wrapper(state)
 
     def _is_in_embed(self, marker: Optional[str]) -> bool:
-        return self._in_embed or self._is_embed_style(marker)
+        return self._in_embed or is_embed_style(marker)
 
     def _is_in_nested_embed(self, marker: Optional[str]) -> bool:
         return self._in_nested_embed or (
@@ -277,12 +274,3 @@ def _is_in_nested_embed(self, marker: Optional[str]) -> bool:
             and marker[1] in EMBED_PART_START_CHAR_STYLES
             and marker != "fm"
         )
-
-    def _is_note_text(self, marker: Optional[str]) -> bool:
-        return marker == "ft"
-
-    def _is_embed_part_style(self, marker: Optional[str]) -> bool:
-        return marker is not None and marker.startswith(EMBED_PART_START_CHAR_STYLES) and marker != "fm"
-
-    def _is_embed_style(self, marker: Optional[str]) -> bool:
-        return marker is not None and marker.strip("*") in EMBED_STYLES
diff --git a/machine/corpora/scripture_update_block.py b/machine/corpora/scripture_update_block.py
index afb9e75a..b4c7e290 100644
--- a/machine/corpora/scripture_update_block.py
+++ b/machine/corpora/scripture_update_block.py
@@ -1,14 +1,18 @@
 from __future__ import annotations
 
 from .scripture_ref import ScriptureRef
-from .scripture_update_element import ScriptureUpdateElement, ScriptureUpdateElementType
+from .scripture_update_element import (
+    ScriptureUpdateElement,
+    ScriptureUpdateElementType,
+    create_non_text_scripture_element,
+)
 from .usfm_token import UsfmToken, UsfmTokenType
 
 
 class ScriptureUpdateBlock:
 
     def __init__(self) -> None:
-        self._ref: ScriptureRef = ScriptureRef()
+        self.ref: ScriptureRef = ScriptureRef()
         self._elements: list[ScriptureUpdateElement] = []
 
     @property
@@ -29,21 +33,19 @@ def add_token(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
                 ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
             )
         else:
-            self._elements.append(ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, [token], marked_for_removal))
+            self._elements.append(create_non_text_scripture_element([token], marked_for_removal))
 
     def add_tokens(self, tokens: list[UsfmToken], marked_for_removal: bool = False) -> None:
         if len(tokens) == 0:
             return
-        self._elements.append(
-            ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, tokens.copy(), marked_for_removal)
-        )
+        self._elements.append(create_non_text_scripture_element(tokens, marked_for_removal))
 
     def update_ref(self, ref: ScriptureRef) -> None:
-        self._ref = ref
+        self.ref = ref
 
     def clear(self) -> None:
         self._elements.clear()
-        self._ref = ScriptureRef()
+        self.ref = ScriptureRef()
 
     def get_tokens(self) -> list[UsfmToken]:
         return [token for element in self._elements for token in element.get_tokens()]
diff --git a/machine/corpora/scripture_update_block_handler.py b/machine/corpora/scripture_update_block_handler.py
index bcbe8fb8..c520f50b 100644
--- a/machine/corpora/scripture_update_block_handler.py
+++ b/machine/corpora/scripture_update_block_handler.py
@@ -7,5 +7,4 @@
 
 class ScriptureUpdateBlockHandler(ABC):
 
-    def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock:
-        raise NotImplementedError("Must be implemented in subclass")
+    def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock: ...
diff --git a/machine/corpora/scripture_update_element.py b/machine/corpora/scripture_update_element.py
index fe39d7e5..7296bd0a 100644
--- a/machine/corpora/scripture_update_element.py
+++ b/machine/corpora/scripture_update_element.py
@@ -3,12 +3,16 @@
 from dataclasses import dataclass
 from enum import Enum, auto
 
-from .usfm_token import UsfmToken
+from .scripture_embed import is_embed_style
+from .usfm_token import UsfmToken, UsfmTokenType
 
 
 class ScriptureUpdateElementType(Enum):
     EXISTING_TEXT = auto()
     INSERTED_TEXT = auto()
+    PARAGRAPH = auto()
+    EMBED = auto()
+    STYLE = auto()
     OTHER = auto()
 
 
@@ -22,3 +26,18 @@ def get_tokens(self) -> list[UsfmToken]:
         if self.marked_for_removal:
             return []
         return self.tokens
+
+
+def create_non_text_scripture_element(
+    tokens: list[UsfmToken], marked_for_removal: bool = False
+) -> ScriptureUpdateElement:
+    tokens = tokens.copy()
+    # Determine if it is a Paragraph, style, embed or other
+    if len(tokens) == 0 or tokens[0].marker is None:
+        return ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, [], marked_for_removal)
+    if tokens[0].type == UsfmTokenType.PARAGRAPH:
+        return ScriptureUpdateElement(ScriptureUpdateElementType.PARAGRAPH, tokens, marked_for_removal)
+    if is_embed_style(tokens[0].marker):
+        return ScriptureUpdateElement(ScriptureUpdateElementType.EMBED, tokens, marked_for_removal)
+    else:
+        return ScriptureUpdateElement(ScriptureUpdateElementType.STYLE, tokens, marked_for_removal)
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index ecdf0881..6ab7bf08 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -2,6 +2,7 @@
 from typing import List, Optional, Sequence, Tuple, Union
 
 from ..scripture.verse_ref import VerseRef
+from .scripture_embed import is_embed_part_style
 from .scripture_ref import ScriptureRef
 from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler
 from .scripture_update_block import ScriptureUpdateBlock
@@ -345,7 +346,7 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
         in_embed: bool = self._is_in_embed(marker)
 
         in_nested_embed: bool = self._is_in_nested_embed(marker)
-        is_style_tag: bool = marker is not None and not self._is_embed_part_style(marker)
+        is_style_tag: bool = marker is not None and not is_embed_part_style(marker)
 
         existing_text = any(
             t.type == UsfmTokenType.TEXT and t.text

From bb730bf36b3a41f8364dbdc9a4fd1a8fe3dc90fc Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Mon, 14 Apr 2025 12:53:27 -0400
Subject: [PATCH 08/11] Make last type - EMBED_BLOCK

---
 machine/corpora/scripture_update_block.py     | 6 ++++--
 machine/corpora/scripture_update_element.py   | 1 +
 machine/corpora/update_usfm_parser_handler.py | 2 +-
 3 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/machine/corpora/scripture_update_block.py b/machine/corpora/scripture_update_block.py
index b4c7e290..72031e9f 100644
--- a/machine/corpora/scripture_update_block.py
+++ b/machine/corpora/scripture_update_block.py
@@ -35,10 +35,12 @@ def add_token(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
         else:
             self._elements.append(create_non_text_scripture_element([token], marked_for_removal))
 
-    def add_tokens(self, tokens: list[UsfmToken], marked_for_removal: bool = False) -> None:
+    def add_embed(self, tokens: list[UsfmToken], marked_for_removal: bool = False) -> None:
         if len(tokens) == 0:
             return
-        self._elements.append(create_non_text_scripture_element(tokens, marked_for_removal))
+        self._elements.append(
+            ScriptureUpdateElement(ScriptureUpdateElementType.EMBED_BLOCK, tokens, marked_for_removal)
+        )
 
     def update_ref(self, ref: ScriptureRef) -> None:
         self.ref = ref
diff --git a/machine/corpora/scripture_update_element.py b/machine/corpora/scripture_update_element.py
index 7296bd0a..754e48f6 100644
--- a/machine/corpora/scripture_update_element.py
+++ b/machine/corpora/scripture_update_element.py
@@ -11,6 +11,7 @@ class ScriptureUpdateElementType(Enum):
     EXISTING_TEXT = auto()
     INSERTED_TEXT = auto()
     PARAGRAPH = auto()
+    EMBED_BLOCK = auto()
     EMBED = auto()
     STYLE = auto()
     OTHER = auto()
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index 6ab7bf08..42a3b56c 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -426,7 +426,7 @@ def _process_embed_update_block(self) -> None:
         self._use_updated_embed_text()
         for handler in self._update_block_handlers:
             self._embed_update_block = handler.process_block(self._embed_update_block)
-        self._update_block.add_tokens(self._embed_update_block.get_tokens())
+        self._update_block.add_embed(self._embed_update_block.get_tokens())
         self._embed_update_block.clear()
 
     def _push_updated_text(self, tokens: List[UsfmToken]) -> None:

From 5ff5e0752241c113b503e77d48bff9a17f4092f1 Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Mon, 14 Apr 2025 15:15:20 -0400
Subject: [PATCH 09/11] linting

---
 machine/corpora/zip_paratext_project_text_updater.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/machine/corpora/zip_paratext_project_text_updater.py b/machine/corpora/zip_paratext_project_text_updater.py
index 75e8ff02..b4dbd8bd 100644
--- a/machine/corpora/zip_paratext_project_text_updater.py
+++ b/machine/corpora/zip_paratext_project_text_updater.py
@@ -18,5 +18,5 @@ def _exists(self, file_name: StrPath) -> bool:
 
     def _open(self, file_name: StrPath) -> Optional[BinaryIO]:
         if file_name in self._archive.namelist():
-            return BytesIO(self._archive.read(file_name))
+            return BytesIO(self._archive.read(str(file_name)))
         return None

From 9dba22bb0a57153341873b69528edc997e772e39 Mon Sep 17 00:00:00 2001
From: John Lambert <john_lambert@sil.org>
Date: Tue, 22 Apr 2025 16:57:33 -0400
Subject: [PATCH 10/11] Reviewer updates

---
 machine/corpora/scripture_update_block_handler.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/machine/corpora/scripture_update_block_handler.py b/machine/corpora/scripture_update_block_handler.py
index c520f50b..b3b1d654 100644
--- a/machine/corpora/scripture_update_block_handler.py
+++ b/machine/corpora/scripture_update_block_handler.py
@@ -1,10 +1,11 @@
 from __future__ import annotations
 
-from abc import ABC
+from abc import ABC, abstractmethod
 
 from .scripture_update_block import ScriptureUpdateBlock
 
 
 class ScriptureUpdateBlockHandler(ABC):
 
+    @abstractmethod
     def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock: ...

From 402a03fcd17268d7669b618e72d911120c606505 Mon Sep 17 00:00:00 2001
From: Damien Daspit <damien_daspit@sil.org>
Date: Fri, 2 May 2025 16:48:38 -0500
Subject: [PATCH 11/11] Refactor update block

---
 .github/workflows/ci.yml                      |   2 +-
 machine/corpora/__init__.py                   |  11 +-
 .../paratext_project_text_updater_base.py     |   8 +-
 machine/corpora/scripture_embed.py            |  16 -
 .../scripture_ref_usfm_parser_handler.py      | 116 ++--
 machine/corpora/scripture_update_block.py     |  53 --
 .../corpora/scripture_update_block_handler.py |  11 -
 machine/corpora/scripture_update_element.py   |  44 --
 machine/corpora/update_usfm_parser_handler.py | 347 +++++-------
 machine/corpora/usfm_text_base.py             |  20 +-
 machine/corpora/usfm_update_block.py          |  55 ++
 machine/corpora/usfm_update_block_element.py  |  24 +
 machine/corpora/usfm_update_block_handler.py  |   8 +
 poetry.lock                                   |  14 +-
 pyproject.toml                                |   2 +-
 .../test_update_usfm_parser_handler.py        | 533 ++++++++++++------
 tests/corpora/test_usfm_file_text.py          |  68 +--
 tests/corpora/test_usfm_memory_text.py        |  16 +-
 18 files changed, 679 insertions(+), 669 deletions(-)
 delete mode 100644 machine/corpora/scripture_embed.py
 delete mode 100644 machine/corpora/scripture_update_block.py
 delete mode 100644 machine/corpora/scripture_update_block_handler.py
 delete mode 100644 machine/corpora/scripture_update_element.py
 create mode 100644 machine/corpora/usfm_update_block.py
 create mode 100644 machine/corpora/usfm_update_block_element.py
 create mode 100644 machine/corpora/usfm_update_block_handler.py

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index 044a4db5..860cd8dc 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -55,7 +55,7 @@ jobs:
           node-version: "14"
       - name: Lint with pyright
         run: |
-          npm install -g pyright@1.1.399
+          npm install -g pyright@1.1.400
           poetry run pyright
       - name: Test with pytest
         run: poetry run pytest --cov --cov-report=xml
diff --git a/machine/corpora/__init__.py b/machine/corpora/__init__.py
index 98773317..523604c0 100644
--- a/machine/corpora/__init__.py
+++ b/machine/corpora/__init__.py
@@ -61,6 +61,9 @@
 from .usfm_tag import UsfmJustification, UsfmStyleAttribute, UsfmStyleType, UsfmTag, UsfmTextProperties, UsfmTextType
 from .usfm_token import UsfmAttribute, UsfmToken, UsfmTokenType
 from .usfm_tokenizer import RtlReferenceOrder, UsfmTokenizer
+from .usfm_update_block import UsfmUpdateBlock
+from .usfm_update_block_element import UsfmUpdateBlockElement, UsfmUpdateBlockElementType
+from .usfm_update_block_handler import UsfmUpdateBlockHandler
 from .usx_file_alignment_collection import UsxFileAlignmentCollection
 from .usx_file_alignment_corpus import UsxFileAlignmentCorpus
 from .usx_file_text import UsxFileText
@@ -92,8 +95,8 @@
     "is_scripture",
     "lowercase",
     "MemoryAlignmentCollection",
-    "MemoryText",
     "MemoryStreamContainer",
+    "MemoryText",
     "MultiKeyRef",
     "nfc_normalize",
     "nfd_normalize",
@@ -126,9 +129,9 @@
     "TextRow",
     "TextRowFlags",
     "unescape_spaces",
-    "UpdateUsfmTextBehavior",
     "UpdateUsfmMarkerBehavior",
     "UpdateUsfmParserHandler",
+    "UpdateUsfmTextBehavior",
     "UsfmAttribute",
     "UsfmElementType",
     "UsfmFileText",
@@ -148,6 +151,10 @@
     "UsfmToken",
     "UsfmTokenizer",
     "UsfmTokenType",
+    "UsfmUpdateBlock",
+    "UsfmUpdateBlockElement",
+    "UsfmUpdateBlockElementType",
+    "UsfmUpdateBlockHandler",
     "UsxFileAlignmentCollection",
     "UsxFileAlignmentCorpus",
     "UsxFileText",
diff --git a/machine/corpora/paratext_project_text_updater_base.py b/machine/corpora/paratext_project_text_updater_base.py
index 8ba806a8..6ae04394 100644
--- a/machine/corpora/paratext_project_text_updater_base.py
+++ b/machine/corpora/paratext_project_text_updater_base.py
@@ -1,13 +1,13 @@
 from abc import ABC, abstractmethod
-from typing import BinaryIO, Optional, Sequence, Tuple, Union
+from typing import BinaryIO, Iterable, Optional, Sequence, Tuple, Union
 
 from ..utils.typeshed import StrPath
 from .paratext_project_settings import ParatextProjectSettings
 from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
 from .scripture_ref import ScriptureRef
-from .scripture_update_block_handler import ScriptureUpdateBlockHandler
 from .update_usfm_parser_handler import UpdateUsfmMarkerBehavior, UpdateUsfmParserHandler, UpdateUsfmTextBehavior
 from .usfm_parser import parse_usfm
+from .usfm_update_block_handler import UsfmUpdateBlockHandler
 
 
 class ParatextProjectTextUpdaterBase(ABC):
@@ -26,8 +26,8 @@ def update_usfm(
         paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
-        preserve_paragraph_styles: Optional[Sequence[str]] = None,
-        update_block_handlers: Optional[list[ScriptureUpdateBlockHandler]] = None,
+        preserve_paragraph_styles: Optional[Union[Iterable[str], str]] = None,
+        update_block_handlers: Optional[Iterable[UsfmUpdateBlockHandler]] = None,
     ) -> Optional[str]:
         file_name: str = self._settings.get_book_file_name(book_id)
         if not self._exists(file_name):
diff --git a/machine/corpora/scripture_embed.py b/machine/corpora/scripture_embed.py
deleted file mode 100644
index cc4a64f6..00000000
--- a/machine/corpora/scripture_embed.py
+++ /dev/null
@@ -1,16 +0,0 @@
-from typing import Optional
-
-EMBED_PART_START_CHAR_STYLES = ("f", "x", "z")
-EMBED_STYLES = ("f", "fe", "fig", "fm", "x")
-
-
-def is_note_text(marker: Optional[str]) -> bool:
-    return marker == "ft"
-
-
-def is_embed_part_style(marker: Optional[str]) -> bool:
-    return marker is not None and marker.startswith(EMBED_PART_START_CHAR_STYLES)
-
-
-def is_embed_style(marker: Optional[str]) -> bool:
-    return marker is not None and marker.strip("*") in EMBED_STYLES
diff --git a/machine/corpora/scripture_ref_usfm_parser_handler.py b/machine/corpora/scripture_ref_usfm_parser_handler.py
index 5dc6783a..db9081b7 100644
--- a/machine/corpora/scripture_ref_usfm_parser_handler.py
+++ b/machine/corpora/scripture_ref_usfm_parser_handler.py
@@ -5,7 +5,6 @@
 from ..scripture.verse_ref import VerseRef, are_overlapping_verse_ranges
 from .corpora_utils import merge_verse_ranges
 from .scripture_element import ScriptureElement
-from .scripture_embed import EMBED_PART_START_CHAR_STYLES, is_embed_part_style, is_embed_style, is_note_text
 from .scripture_ref import ScriptureRef
 from .usfm_parser_handler import UsfmParserHandler
 from .usfm_parser_state import UsfmParserState
@@ -16,7 +15,14 @@ class ScriptureTextType(Enum):
     NONE = auto()
     NONVERSE = auto()
     VERSE = auto()
-    NOTE_TEXT = auto()
+    EMBED = auto()
+
+
+_EMBED_STYLES = {"f", "fe", "x", "fig"}
+
+
+def _is_embed_style(marker: Optional[str]) -> bool:
+    return marker is not None and (marker.strip("*") in _EMBED_STYLES or marker.startswith("z"))
 
 
 class ScriptureRefUsfmParserHandler(UsfmParserHandler, ABC):
@@ -25,18 +31,11 @@ def __init__(self) -> None:
         self._cur_elements_stack: List[ScriptureElement] = []
         self._cur_text_type_stack: List[ScriptureTextType] = []
         self._duplicate_verse: bool = False
-        self._in_preserved_paragraph: bool = False
-        self._in_embed: bool = False
-        self._in_note_text: bool = False
-        self._in_nested_embed: bool = False
 
     @property
     def _current_text_type(self) -> ScriptureTextType:
         return ScriptureTextType.NONE if len(self._cur_text_type_stack) == 0 else self._cur_text_type_stack[-1]
 
-    def _is_in_note_text(self) -> bool:
-        return self._in_note_text
-
     def end_usfm(self, state: UsfmParserState) -> None:
         self._end_verse_text_wrapper(state)
 
@@ -112,32 +111,6 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
     def end_sidebar(self, state: UsfmParserState, marker: str, closed: bool) -> None:
         self._end_parent_element()
 
-    def start_note(self, state: UsfmParserState, marker: str, caller: str, category: Optional[str]) -> None:
-        self._in_embed = True
-        self._start_embed_wrapper(state, marker)
-
-    def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None:
-        self._end_note_text_wrapper(state)
-        self._end_embed(state, marker, None, closed)
-        self._in_embed = False
-
-    def _start_embed_wrapper(self, state: UsfmParserState, marker: str) -> None:
-        if self._cur_verse_ref.is_default:
-            self._update_verse_ref(state.verse_ref, marker)
-
-        if not self._duplicate_verse:
-            self._check_convert_verse_para_to_non_verse(state)
-            self._next_element(marker)
-
-        self._start_embed(state, self._create_non_verse_ref())
-
-    def _start_embed(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
-
-    def _end_embed(
-        self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
-    ) -> None:
-        pass
-
     def text(self, state: UsfmParserState, text: str) -> None:
         # if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment
         if text.strip():
@@ -149,29 +122,23 @@ def opt_break(self, state: UsfmParserState) -> None:
     def start_char(
         self, state: UsfmParserState, marker: str, unknown: bool, attributes: Optional[Sequence[UsfmAttribute]]
     ) -> None:
-        if is_embed_part_style(marker) and self._in_note_text:
-            self._in_nested_embed = True
         # if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
         self._check_convert_verse_para_to_non_verse(state)
 
-        if is_embed_style(marker):
-            self._in_embed = True
-            self._start_embed_wrapper(state, marker)
-
-        if is_note_text(marker):
-            self._start_note_text_wrapper(state)
+        if _is_embed_style(marker):
+            self._start_embed_text_wrapper(state, marker)
 
     def end_char(
         self, state: UsfmParserState, marker: str, attributes: Optional[Sequence[UsfmAttribute]], closed: bool
     ) -> None:
-        if is_embed_part_style(marker):
-            if self._in_nested_embed:
-                self._in_nested_embed = False
-            elif self._is_note_text(marker):
-                self._end_note_text_wrapper(state)
-        if is_embed_style(marker):
-            self._end_embed(state, marker, attributes, closed)
-            self._in_embed = False
+        if _is_embed_style(marker):
+            self._end_embed_text_wrapper(state)
+
+    def start_note(self, state: UsfmParserState, marker: str, caller: str, category: Optional[str]) -> None:
+        self._start_embed_text_wrapper(state, marker)
+
+    def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None:
+        self._end_embed_text_wrapper(state)
 
     def _start_verse_text(self, state: UsfmParserState, scripture_refs: Optional[Sequence[ScriptureRef]]) -> None: ...
 
@@ -181,20 +148,9 @@ def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: Scripture
 
     def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
 
-    def _start_note_text_wrapper(self, state: UsfmParserState):
-        self._in_note_text = True
-        self._cur_text_type_stack.append(ScriptureTextType.NOTE_TEXT)
-        self._start_note_text(state)
-
-    def _start_note_text(self, state: UsfmParserState) -> None: ...
+    def _start_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
 
-    def _end_note_text_wrapper(self, state: UsfmParserState):
-        if self._cur_text_type_stack and self._cur_text_type_stack[-1] == ScriptureTextType.NOTE_TEXT:
-            self._end_note_text(state, self._create_non_verse_ref())
-            self._cur_text_type_stack.pop()
-            self._in_note_text = False
-
-    def _end_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
+    def _end_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None: ...
 
     def _start_verse_text_wrapper(self, state: UsfmParserState) -> None:
         self._duplicate_verse = False
@@ -222,6 +178,25 @@ def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
             self._cur_elements_stack.append(ScriptureElement(0, marker))
         self._cur_verse_ref = verse_ref.copy()
 
+    def _start_embed_text_wrapper(self, state: UsfmParserState, marker: str) -> None:
+        if self._cur_verse_ref.is_default:
+            self._update_verse_ref(state.verse_ref, marker)
+
+        if not self._duplicate_verse:
+            self._check_convert_verse_para_to_non_verse(state)
+            self._next_element(marker)
+            self._cur_text_type_stack.append(ScriptureTextType.EMBED)
+            self._start_embed_text(state, self._create_non_verse_ref())
+
+    def _end_embed_text_wrapper(self, state: UsfmParserState) -> None:
+        if (
+            not self._duplicate_verse
+            and self._cur_text_type_stack
+            and self._cur_text_type_stack[-1] == ScriptureTextType.EMBED
+        ):
+            self._end_embed_text(state, self._create_non_verse_ref())
+            self._cur_text_type_stack.pop()
+
     def _next_element(self, marker: str) -> None:
         prev_elem: ScriptureElement = self._cur_elements_stack.pop()
         self._cur_elements_stack.append(ScriptureElement(prev_elem.position + 1, marker))
@@ -234,7 +209,7 @@ def _end_parent_element(self) -> None:
         self._cur_elements_stack.pop()
 
     def _end_embed_elements(self) -> None:
-        if self._cur_elements_stack and is_embed_style(self._cur_elements_stack[-1].name):
+        if self._cur_elements_stack and _is_embed_style(self._cur_elements_stack[-1].name):
             self._cur_elements_stack.pop()
 
     def _create_verse_refs(self) -> List[ScriptureRef]:
@@ -263,14 +238,3 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
         ):
             self._start_parent_element(para_tag.marker)
             self._start_non_verse_text_wrapper(state)
-
-    def _is_in_embed(self, marker: Optional[str]) -> bool:
-        return self._in_embed or is_embed_style(marker)
-
-    def _is_in_nested_embed(self, marker: Optional[str]) -> bool:
-        return self._in_nested_embed or (
-            marker is not None
-            and marker.startswith("+")
-            and marker[1] in EMBED_PART_START_CHAR_STYLES
-            and marker != "fm"
-        )
diff --git a/machine/corpora/scripture_update_block.py b/machine/corpora/scripture_update_block.py
deleted file mode 100644
index 72031e9f..00000000
--- a/machine/corpora/scripture_update_block.py
+++ /dev/null
@@ -1,53 +0,0 @@
-from __future__ import annotations
-
-from .scripture_ref import ScriptureRef
-from .scripture_update_element import (
-    ScriptureUpdateElement,
-    ScriptureUpdateElementType,
-    create_non_text_scripture_element,
-)
-from .usfm_token import UsfmToken, UsfmTokenType
-
-
-class ScriptureUpdateBlock:
-
-    def __init__(self) -> None:
-        self.ref: ScriptureRef = ScriptureRef()
-        self._elements: list[ScriptureUpdateElement] = []
-
-    @property
-    def elements(self) -> list[ScriptureUpdateElement]:
-        return self._elements
-
-    def add_existing_text(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
-        self._elements.append(
-            ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
-        )
-
-    def add_inserted_text(self, tokens: list[UsfmToken]) -> None:
-        self._elements.append(ScriptureUpdateElement(ScriptureUpdateElementType.INSERTED_TEXT, tokens.copy()))
-
-    def add_token(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
-        if token.type == UsfmTokenType.TEXT:
-            self._elements.append(
-                ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
-            )
-        else:
-            self._elements.append(create_non_text_scripture_element([token], marked_for_removal))
-
-    def add_embed(self, tokens: list[UsfmToken], marked_for_removal: bool = False) -> None:
-        if len(tokens) == 0:
-            return
-        self._elements.append(
-            ScriptureUpdateElement(ScriptureUpdateElementType.EMBED_BLOCK, tokens, marked_for_removal)
-        )
-
-    def update_ref(self, ref: ScriptureRef) -> None:
-        self.ref = ref
-
-    def clear(self) -> None:
-        self._elements.clear()
-        self.ref = ScriptureRef()
-
-    def get_tokens(self) -> list[UsfmToken]:
-        return [token for element in self._elements for token in element.get_tokens()]
diff --git a/machine/corpora/scripture_update_block_handler.py b/machine/corpora/scripture_update_block_handler.py
deleted file mode 100644
index b3b1d654..00000000
--- a/machine/corpora/scripture_update_block_handler.py
+++ /dev/null
@@ -1,11 +0,0 @@
-from __future__ import annotations
-
-from abc import ABC, abstractmethod
-
-from .scripture_update_block import ScriptureUpdateBlock
-
-
-class ScriptureUpdateBlockHandler(ABC):
-
-    @abstractmethod
-    def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock: ...
diff --git a/machine/corpora/scripture_update_element.py b/machine/corpora/scripture_update_element.py
deleted file mode 100644
index 754e48f6..00000000
--- a/machine/corpora/scripture_update_element.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from __future__ import annotations
-
-from dataclasses import dataclass
-from enum import Enum, auto
-
-from .scripture_embed import is_embed_style
-from .usfm_token import UsfmToken, UsfmTokenType
-
-
-class ScriptureUpdateElementType(Enum):
-    EXISTING_TEXT = auto()
-    INSERTED_TEXT = auto()
-    PARAGRAPH = auto()
-    EMBED_BLOCK = auto()
-    EMBED = auto()
-    STYLE = auto()
-    OTHER = auto()
-
-
-@dataclass
-class ScriptureUpdateElement:
-    type: ScriptureUpdateElementType
-    tokens: list[UsfmToken]
-    marked_for_removal: bool = False
-
-    def get_tokens(self) -> list[UsfmToken]:
-        if self.marked_for_removal:
-            return []
-        return self.tokens
-
-
-def create_non_text_scripture_element(
-    tokens: list[UsfmToken], marked_for_removal: bool = False
-) -> ScriptureUpdateElement:
-    tokens = tokens.copy()
-    # Determine if it is a Paragraph, style, embed or other
-    if len(tokens) == 0 or tokens[0].marker is None:
-        return ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, [], marked_for_removal)
-    if tokens[0].type == UsfmTokenType.PARAGRAPH:
-        return ScriptureUpdateElement(ScriptureUpdateElementType.PARAGRAPH, tokens, marked_for_removal)
-    if is_embed_style(tokens[0].marker):
-        return ScriptureUpdateElement(ScriptureUpdateElementType.EMBED, tokens, marked_for_removal)
-    else:
-        return ScriptureUpdateElement(ScriptureUpdateElementType.STYLE, tokens, marked_for_removal)
diff --git a/machine/corpora/update_usfm_parser_handler.py b/machine/corpora/update_usfm_parser_handler.py
index 42a3b56c..8df9db91 100644
--- a/machine/corpora/update_usfm_parser_handler.py
+++ b/machine/corpora/update_usfm_parser_handler.py
@@ -1,16 +1,15 @@
 from enum import Enum, auto
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import Iterable, List, Optional, Sequence, Tuple, Union
 
-from ..scripture.verse_ref import VerseRef
-from .scripture_embed import is_embed_part_style
 from .scripture_ref import ScriptureRef
-from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler
-from .scripture_update_block import ScriptureUpdateBlock
-from .scripture_update_block_handler import ScriptureUpdateBlockHandler
+from .scripture_ref_usfm_parser_handler import ScriptureRefUsfmParserHandler, ScriptureTextType
 from .usfm_parser_state import UsfmParserState
 from .usfm_stylesheet import UsfmStylesheet
 from .usfm_token import UsfmAttribute, UsfmToken, UsfmTokenType
 from .usfm_tokenizer import UsfmTokenizer
+from .usfm_update_block import UsfmUpdateBlock
+from .usfm_update_block_element import UsfmUpdateBlockElementType
+from .usfm_update_block_handler import UsfmUpdateBlockHandler
 
 
 class UpdateUsfmTextBehavior(Enum):
@@ -25,7 +24,6 @@ class UpdateUsfmMarkerBehavior(Enum):
 
 
 class UpdateUsfmParserHandler(ScriptureRefUsfmParserHandler):
-
     def __init__(
         self,
         rows: Optional[Sequence[Tuple[Sequence[ScriptureRef], str]]] = None,
@@ -34,21 +32,20 @@ def __init__(
         paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
         style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
-        preserve_paragraph_styles: Optional[Sequence[str]] = None,
-        update_block_handlers: Optional[list[ScriptureUpdateBlockHandler]] = None,
+        preserve_paragraph_styles: Optional[Union[Iterable[str], str]] = None,
+        update_block_handlers: Optional[Iterable[UsfmUpdateBlockHandler]] = None,
     ) -> None:
         super().__init__()
         self._rows = rows or []
         self._tokens: List[UsfmToken] = []
         self._updated_text: List[UsfmToken] = []
-        self._updated_embed_text: List[UsfmToken] = []
-        self._update_block: ScriptureUpdateBlock = ScriptureUpdateBlock()
-        self._embed_update_block: ScriptureUpdateBlock = ScriptureUpdateBlock()
+        self._update_block_stack: list[UsfmUpdateBlock] = []
+        self._embed_tokens: List[UsfmToken] = []
         self._id_text = id_text
         if update_block_handlers is None:
             self._update_block_handlers = []
         else:
-            self._update_block_handlers = update_block_handlers
+            self._update_block_handlers = list(update_block_handlers)
         if preserve_paragraph_styles is None:
             self._preserve_paragraph_styles = set(["r", "rem"])
         elif isinstance(preserve_paragraph_styles, str):
@@ -62,20 +59,18 @@ def __init__(
         self._replace_stack: List[bool] = []
         self._row_index: int = 0
         self._token_index: int = 0
-        self._embed_updated: bool = False
-        self._embed_row_texts: List[str] = []
 
     @property
     def tokens(self) -> List[UsfmToken]:
         return self._tokens
 
     def end_usfm(self, state: UsfmParserState) -> None:
-        self._collect_tokens(state)
-        self._process_update_block()
+        self._collect_updatable_tokens(state)
         super().end_usfm(state)
 
     def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
-        self._collect_tokens(state)
+        self._collect_readonly_tokens(state)
+        self._update_block_stack.append(UsfmUpdateBlock())
         start_book_tokens: List[UsfmToken] = []
         if self._id_text is not None:
             start_book_tokens.append(UsfmToken(UsfmTokenType.TEXT, text=self._id_text + " "))
@@ -84,7 +79,11 @@ def start_book(self, state: UsfmParserState, marker: str, code: str) -> None:
         super().start_book(state, marker, code)
 
     def end_book(self, state: UsfmParserState, marker: str) -> None:
-        self._process_update_block()
+        self._use_updated_text()
+        self._pop_new_tokens()
+        update_block = self._update_block_stack.pop()
+        self._tokens.extend(update_block.get_tokens())
+
         super().end_book(state, marker)
 
     def start_para(
@@ -94,48 +93,35 @@ def start_para(
         unknown: bool,
         attributes: Optional[Sequence[UsfmAttribute]],
     ) -> None:
-        if marker in self._preserve_paragraph_styles:
-            self._in_preserved_paragraph = True
-
         if (
             state.is_verse_text
             and (self._has_new_text() or self._text_behavior == UpdateUsfmTextBehavior.STRIP_EXISTING)
             and self._paragraph_behavior == UpdateUsfmMarkerBehavior.STRIP
         ):
-            self._skip_tokens(state)
+            self._skip_updatable_tokens(state)
         else:
-            self._collect_tokens(state)
+            self._collect_updatable_tokens(state)
 
         super().start_para(state, marker, unknown, attributes)
 
-    def end_para(self, state: UsfmParserState, marker: str) -> None:
-        if not state.is_verse_text:
-            self._process_update_block()
-        super().end_para(state, marker)
-        self._in_preserved_paragraph = False
-
     def start_row(self, state: UsfmParserState, marker: str) -> None:
-        self._collect_tokens(state)
+        self._collect_updatable_tokens(state)
 
         super().start_row(state, marker)
 
     def start_cell(self, state: UsfmParserState, marker: str, align: str, colspan: int) -> None:
-        self._collect_tokens(state)
+        self._collect_updatable_tokens(state)
 
         super().start_cell(state, marker, align, colspan)
 
-    def end_cell(self, state: UsfmParserState, marker: str) -> None:
-        self._collect_tokens(state)
-        super().end_cell(state, marker)
-
     def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> None:
-        self._collect_tokens(state)
+        self._collect_updatable_tokens(state)
 
         super().start_sidebar(state, marker, category)
 
     def end_sidebar(self, state: UsfmParserState, marker: str, closed: bool) -> None:
         if closed:
-            self._collect_tokens(state)
+            self._collect_updatable_tokens(state)
 
         super().end_sidebar(state, marker, closed)
 
@@ -147,11 +133,12 @@ def chapter(
         alt_number: str,
         pub_number: str,
     ) -> None:
-        self._process_update_block()
-        self._collect_tokens(state)
+        self._use_updated_text()
 
         super().chapter(state, number, marker, alt_number, pub_number)
 
+        self._collect_readonly_tokens(state)
+
     def milestone(
         self,
         state: UsfmParserState,
@@ -159,8 +146,7 @@ def milestone(
         start_milestone: bool,
         attributes: Sequence[UsfmAttribute],
     ) -> None:
-        self._process_update_block()
-        self._collect_tokens(state)
+        self._collect_updatable_tokens(state)
 
         super().milestone(state, marker, start_milestone, attributes)
 
@@ -172,11 +158,23 @@ def verse(
         alt_number: str,
         pub_number: str,
     ) -> None:
-        self._process_update_block()
-        self._collect_tokens(state)
+        self._use_updated_text()
 
         super().verse(state, number, marker, alt_number, pub_number)
 
+        self._collect_readonly_tokens(state)
+
+    def start_note(self, state: UsfmParserState, marker: str, caller: str, category: str) -> None:
+        super().start_note(state, marker, caller, category)
+
+        self._collect_updatable_tokens(state)
+
+    def end_note(self, state: UsfmParserState, marker: str, closed: bool) -> None:
+        if closed:
+            self._collect_updatable_tokens(state)
+
+        super().end_note(state, marker, closed)
+
     def start_char(
         self,
         state: UsfmParserState,
@@ -184,13 +182,17 @@ def start_char(
         unknown: bool,
         attributes: Sequence[UsfmAttribute],
     ) -> None:
-        if self._replace_with_new_tokens(state):
-            self._skip_tokens(state)
-        else:
-            self._collect_tokens(state)
-
         super().start_char(state, marker_without_plus, unknown, attributes)
 
+        if self._current_text_type == ScriptureTextType.EMBED:
+            self._collect_updatable_tokens(state)
+        else:
+            self._replace_with_new_tokens(state)
+            if self._style_behavior == UpdateUsfmMarkerBehavior.STRIP:
+                self._skip_updatable_tokens(state)
+            else:
+                self._collect_updatable_tokens(state)
+
     def end_char(
         self,
         state: UsfmParserState,
@@ -198,98 +200,67 @@ def end_char(
         attributes: Sequence[UsfmAttribute],
         closed: bool,
     ) -> None:
-
-        skip_tokens = self._replace_with_new_tokens(state, closed)
         if closed:
-            if skip_tokens:
-                self._skip_tokens(state)
+            if self._current_text_type == ScriptureTextType.EMBED:
+                self._collect_updatable_tokens(state)
             else:
-                self._collect_tokens(state)
+                self._replace_with_new_tokens(state)
+                if self._style_behavior == UpdateUsfmMarkerBehavior.STRIP:
+                    self._skip_updatable_tokens(state)
+                else:
+                    self._collect_updatable_tokens(state)
 
         super().end_char(state, marker, attributes, closed)
 
-    def _start_embed(
-        self,
-        state: UsfmParserState,
-        scripture_ref: ScriptureRef,
-    ) -> None:
-        self._embed_update_block.update_ref(scripture_ref)
-        self._embed_row_texts = self._advance_rows([scripture_ref])
-        self._embed_updated = any(self._embed_row_texts)
-
-        if self._replace_with_new_tokens(state):
-            self._skip_tokens(state)
-        else:
-            self._collect_tokens(state)
-
-    def _end_embed(
-        self, state: UsfmParserState, marker: str, attributes: Sequence[UsfmAttribute], closed: bool
-    ) -> None:
-        skip_tokens = self._replace_with_new_tokens(state, closed)
-        if closed:
-            if skip_tokens:
-                self._skip_tokens(state)
-            else:
-                self._collect_tokens(state)
-
-        self._process_embed_update_block()
-        self._embed_row_texts.clear()
-        self._embed_updated = False
-
-        super()._end_embed(state, marker, attributes, closed)
-
     def ref(self, state: UsfmParserState, marker: str, display: str, target: str) -> None:
+        super().ref(state, marker, display, target)
+
         if self._replace_with_new_tokens(state):
-            self._skip_tokens(state)
+            self._skip_updatable_tokens(state)
         else:
-            self._collect_tokens(state)
-
-        super().ref(state, marker, display, target)
+            self._collect_updatable_tokens(state)
 
     def text(self, state: UsfmParserState, text: str) -> None:
         super().text(state, text)
 
         if self._replace_with_new_tokens(state):
-            self._skip_tokens(state)
+            self._skip_updatable_tokens(state)
         else:
-            self._collect_tokens(state)
+            self._collect_updatable_tokens(state)
 
     def opt_break(self, state: UsfmParserState) -> None:
-        if self._replace_with_new_tokens(state):
-            self._skip_tokens(state)
-        else:
-            self._collect_tokens(state)
-
         super().opt_break(state)
 
-    def unmatched(self, state: UsfmParserState, marker: str) -> None:
         if self._replace_with_new_tokens(state):
-            self._skip_tokens(state)
+            self._skip_updatable_tokens(state)
         else:
-            self._collect_tokens(state)
+            self._collect_updatable_tokens(state)
 
+    def unmatched(self, state: UsfmParserState, marker: str) -> None:
         super().unmatched(state, marker)
 
+        if self._replace_with_new_tokens(state):
+            self._skip_updatable_tokens(state)
+        else:
+            self._collect_updatable_tokens(state)
+
     def _start_verse_text(self, state: UsfmParserState, scripture_refs: Sequence[ScriptureRef]) -> None:
-        row_texts: List[str] = self._advance_rows(scripture_refs)
-        self._push_updated_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
+        self._start_update_block(scripture_refs)
 
     def _end_verse_text(self, state: UsfmParserState, scripture_refs: Sequence[ScriptureRef]) -> None:
-        self._pop_new_tokens()
+        self._end_update_block(scripture_refs)
 
     def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
-        row_texts = self._advance_rows([scripture_ref])
-        self._push_updated_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
+        self._start_update_block([scripture_ref])
 
     def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
-        self._pop_new_tokens()
+        self._end_update_block([scripture_ref])
 
-    def _start_note_text(self, state: UsfmParserState) -> None:
-        self._push_updated_embed_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in self._embed_row_texts])
-
-    def _end_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
-        self._embed_row_texts.clear()
-        self._pop_new_tokens()
+    def _end_embed_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
+        self._update_block_stack[-1].add_embed(
+            self._embed_tokens, marked_for_removal=self._embed_behavior == UpdateUsfmMarkerBehavior.STRIP
+        )
+        self._embed_tokens.clear()
 
     def get_usfm(self, stylesheet: Union[str, UsfmStylesheet] = "usfm.sty") -> str:
         if isinstance(stylesheet, str):
@@ -321,113 +292,93 @@ def _advance_rows(self, seg_scr_refs: Sequence[ScriptureRef]) -> List[str]:
                 self._row_index += 1
         return row_texts
 
-    def _collect_tokens(self, state: UsfmParserState) -> None:
+    def _collect_updatable_tokens(self, state: UsfmParserState) -> None:
         self._use_updated_text()
         while self._token_index <= state.index + state.special_token_count:
             token = state.tokens[self._token_index]
-            if self._is_in_embed(token.marker):
-                self._embed_update_block.add_token(token)
+            if self._current_text_type == ScriptureTextType.EMBED:
+                self._embed_tokens.append(token)
+            elif (
+                self._current_text_type != ScriptureTextType.NONE
+                or (state.para_tag is not None and state.para_tag.marker == "id")
+            ) and len(self._update_block_stack) > 0:
+                self._update_block_stack[-1].add_token(token)
             else:
-                self._update_block.add_token(token)
+                self._tokens.append(token)
             self._token_index += 1
 
-    def _skip_tokens(self, state: UsfmParserState) -> None:
+    def _collect_readonly_tokens(self, state: UsfmParserState) -> None:
         while self._token_index <= state.index + state.special_token_count:
             token = state.tokens[self._token_index]
-            if self._is_in_embed(token.marker):
-                self._embed_update_block.add_token(token, marked_for_removal=True)
+            if len(self._update_block_stack) > 0:
+                self._update_block_stack[-1].add_token(token)
             else:
-                self._update_block.add_token(token, marked_for_removal=True)
+                self._tokens.append(token)
             self._token_index += 1
-        self._token_index = state.index + 1 + state.special_token_count
 
-    def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True) -> bool:
-        marker: Optional[str] = state.token if state.token is None else state.token.marker
-        in_embed: bool = self._is_in_embed(marker)
+    def _skip_updatable_tokens(self, state: UsfmParserState) -> None:
+        while self._token_index <= state.index + state.special_token_count:
+            token = state.tokens[self._token_index]
+            if self._current_text_type != ScriptureTextType.NONE or (
+                state.para_tag is not None and state.para_tag.marker == "id"
+            ):
+                if len(self._update_block_stack) > 0:
+                    self._update_block_stack[-1].add_token(token, marked_for_removal=True)
+            self._token_index += 1
+        self._token_index = state.index + 1 + state.special_token_count
 
-        in_nested_embed: bool = self._is_in_nested_embed(marker)
-        is_style_tag: bool = marker is not None and not is_embed_part_style(marker)
+    def _replace_with_new_tokens(self, state: UsfmParserState) -> bool:
+        if self._current_text_type == ScriptureTextType.EMBED:
+            return False
 
         existing_text = any(
             t.type == UsfmTokenType.TEXT and t.text
             for t in state.tokens[self._token_index : state.index + 1 + state.special_token_count]
         )
 
-        use_new_tokens = (
-            not self._is_in_preserved_paragraph(marker)
-            and (
-                self._text_behavior == UpdateUsfmTextBehavior.STRIP_EXISTING
-                or (
-                    self._has_new_text()
-                    and (not existing_text or self._text_behavior != UpdateUsfmTextBehavior.PREFER_EXISTING)
-                )
-            )
-            and (
-                not in_embed
-                or (
-                    self._is_in_note_text()
-                    and not in_nested_embed
-                    and self._embed_behavior == UpdateUsfmMarkerBehavior.PRESERVE
-                )
-            )
-        )
-
-        if use_new_tokens:
-            if in_embed:
-                self._use_updated_embed_text()
-            else:
-                self._use_updated_text()
-
-        if existing_text and (
-            self._text_behavior == UpdateUsfmTextBehavior.PREFER_EXISTING or self._is_in_preserved_paragraph(marker)
+        use_new_tokens = True
+        if self._is_in_preserved_paragraph(state):
+            use_new_tokens = False
+        elif self._text_behavior != UpdateUsfmTextBehavior.STRIP_EXISTING and (
+            not self._has_new_text()
+            or (existing_text and self._text_behavior == UpdateUsfmTextBehavior.PREFER_EXISTING)
         ):
-            if in_embed:
-                self._clear_updated_embed_text()
-            else:
-                self._clear_updated_text()
+            use_new_tokens = False
 
-        embed_in_new_verse_text = (
-            any(self._replace_stack) or self._text_behavior == UpdateUsfmTextBehavior.STRIP_EXISTING
-        ) and in_embed
-        if embed_in_new_verse_text or self._embed_updated:
-            if self._embed_behavior == UpdateUsfmMarkerBehavior.STRIP:
-                self._clear_updated_embed_text()
-                return True
-            if not self._is_in_note_text() or in_nested_embed:
-                return False
+        if use_new_tokens:
+            self._use_updated_text()
 
-        skip_tokens = use_new_tokens and closed
+        clear_new_tokens = existing_text and (
+            self._text_behavior == UpdateUsfmTextBehavior.PREFER_EXISTING or self._is_in_preserved_paragraph(state)
+        )
 
-        if use_new_tokens and is_style_tag:
-            skip_tokens = self._style_behavior == UpdateUsfmMarkerBehavior.STRIP
+        if clear_new_tokens:
+            self._clear_updated_text()
 
-        return skip_tokens
+        return use_new_tokens
 
     def _has_new_text(self) -> bool:
         return any(self._replace_stack) and self._replace_stack[-1]
 
-    def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
-        super()._update_verse_ref(verse_ref, marker)
-        self._update_block.update_ref(ScriptureRef(verse_ref.copy()))
-
-    def _create_non_verse_ref(self) -> ScriptureRef:
-        ref = super()._create_non_verse_ref()
-        self._update_block.update_ref(ref)
-        return ref
+    def _start_update_block(self, scripture_refs: Sequence[ScriptureRef]) -> None:
+        self._update_block_stack.append(UsfmUpdateBlock(scripture_refs))
+        row_texts: List[str] = self._advance_rows(scripture_refs)
+        self._push_updated_text([UsfmToken(UsfmTokenType.TEXT, text=t + " ") for t in row_texts])
 
-    def _process_update_block(self) -> None:
+    def _end_update_block(self, scripture_refs: Sequence[ScriptureRef]) -> None:
         self._use_updated_text()
+        self._pop_new_tokens()
+        update_block = self._update_block_stack.pop()
+        update_block.update_refs(scripture_refs)
         for handler in self._update_block_handlers:
-            self._update_block = handler.process_block(self._update_block)
-        self._tokens.extend(self._update_block.get_tokens())
-        self._update_block.clear()
-
-    def _process_embed_update_block(self) -> None:
-        self._use_updated_embed_text()
-        for handler in self._update_block_handlers:
-            self._embed_update_block = handler.process_block(self._embed_update_block)
-        self._update_block.add_embed(self._embed_update_block.get_tokens())
-        self._embed_update_block.clear()
+            update_block = handler.process_block(update_block)
+        if (
+            len(self._update_block_stack) > 0
+            and self._update_block_stack[-1].elements[-1].type == UsfmUpdateBlockElementType.PARAGRAPH
+        ):
+            self._update_block_stack[-1].extend_last_element(update_block.get_tokens())
+        else:
+            self._tokens.extend(update_block.get_tokens())
 
     def _push_updated_text(self, tokens: List[UsfmToken]) -> None:
         self._replace_stack.append(any(tokens))
@@ -436,30 +387,14 @@ def _push_updated_text(self, tokens: List[UsfmToken]) -> None:
 
     def _use_updated_text(self) -> None:
         if self._updated_text:
-            self._update_block.add_inserted_text(self._updated_text)
+            self._update_block_stack[-1].add_text(self._updated_text)
         self._updated_text.clear()
 
     def _clear_updated_text(self) -> None:
         self._updated_text.clear()
 
-    def _push_updated_embed_text(self, tokens: List[UsfmToken]) -> None:
-        self._replace_stack.append(any(tokens))
-        if tokens:
-            self._updated_embed_text.extend(tokens)
-
-    def _use_updated_embed_text(self) -> None:
-        if self._updated_embed_text:
-            self._embed_update_block.add_inserted_text(self._updated_embed_text)
-        self._updated_embed_text.clear()
-
-    def _clear_updated_embed_text(self) -> None:
-        self._updated_embed_text.clear()
-
-    def _push_updated_text_as_previous(self) -> None:
-        self._replace_stack.append(self._replace_stack[-1])
-
     def _pop_new_tokens(self) -> None:
         self._replace_stack.pop()
 
-    def _is_in_preserved_paragraph(self, marker: Optional[str]) -> bool:
-        return self._in_preserved_paragraph or marker in self._preserve_paragraph_styles
+    def _is_in_preserved_paragraph(self, state: UsfmParserState) -> bool:
+        return state.para_tag is not None and state.para_tag.marker in self._preserve_paragraph_styles
diff --git a/machine/corpora/usfm_text_base.py b/machine/corpora/usfm_text_base.py
index 4556144d..c286c001 100644
--- a/machine/corpora/usfm_text_base.py
+++ b/machine/corpora/usfm_text_base.py
@@ -202,12 +202,8 @@ def text(self, state: UsfmParserState, text: str) -> None:
                     text = text.lstrip()
                 row_text += text
         elif len(text) > 0 and (self._current_text_type != ScriptureTextType.VERSE or state.is_verse_text):
-            is_embed_or_nested_dont_update = (
-                state.token is not None
-                and self._is_in_embed(state.token.marker)
-                and (not self._is_in_note_text() or self._is_in_nested_embed(state.token.marker))
-            )
-            if is_embed_or_nested_dont_update:
+            # ignore embed text
+            if self._current_text_type == ScriptureTextType.EMBED:
                 return
 
             if (
@@ -235,18 +231,6 @@ def _end_non_verse_text(self, state: UsfmParserState, scripture_ref: ScriptureRe
         if self._text._include_all_text:
             self._rows.append(self._text._create_scripture_row(scripture_ref, text, self._sentence_start))
 
-    def _start_note_text(self, state: UsfmParserState) -> None:
-        if self._text._include_markers:
-            return
-        self._row_texts_stack.append("")
-
-    def _end_note_text(self, state: UsfmParserState, scripture_ref: ScriptureRef) -> None:
-        if self._text._include_markers:
-            return
-        text = self._row_texts_stack.pop()
-        if self._text._include_all_text:
-            self._rows.append(self._text._create_scripture_row(scripture_ref, text, self._sentence_start))
-
     def _output_marker(self, state: UsfmParserState) -> None:
         if not self._text._include_markers or len(self._row_texts_stack) == 0:
             return
diff --git a/machine/corpora/usfm_update_block.py b/machine/corpora/usfm_update_block.py
new file mode 100644
index 00000000..3d612d5f
--- /dev/null
+++ b/machine/corpora/usfm_update_block.py
@@ -0,0 +1,55 @@
+from __future__ import annotations
+
+from typing import Iterable, Sequence
+
+from .scripture_ref import ScriptureRef
+from .usfm_token import UsfmToken, UsfmTokenType
+from .usfm_update_block_element import UsfmUpdateBlockElement, UsfmUpdateBlockElementType
+
+
+class UsfmUpdateBlock:
+    def __init__(self, refs: Iterable[ScriptureRef] = [], elements: Iterable[UsfmUpdateBlockElement] = []) -> None:
+        self._refs: list[ScriptureRef] = list(refs)
+        self._elements: list[UsfmUpdateBlockElement] = list(elements)
+
+    @property
+    def refs(self) -> Sequence[ScriptureRef]:
+        return self._refs
+
+    @property
+    def elements(self) -> Sequence[UsfmUpdateBlockElement]:
+        return self._elements
+
+    def add_text(self, tokens: Iterable[UsfmToken]) -> None:
+        self._elements.append(UsfmUpdateBlockElement(UsfmUpdateBlockElementType.TEXT, list(tokens)))
+
+    def add_token(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
+        if token.type == UsfmTokenType.TEXT:
+            element_type = UsfmUpdateBlockElementType.TEXT
+        elif token.type == UsfmTokenType.PARAGRAPH:
+            element_type = UsfmUpdateBlockElementType.PARAGRAPH
+        elif token.type == UsfmTokenType.CHARACTER or token.type == UsfmTokenType.END:
+            element_type = UsfmUpdateBlockElementType.STYLE
+        else:
+            element_type = UsfmUpdateBlockElementType.OTHER
+        self._elements.append(UsfmUpdateBlockElement(element_type, [token], marked_for_removal))
+
+    def add_embed(self, tokens: Iterable[UsfmToken], marked_for_removal: bool = False) -> None:
+        self._elements.append(
+            UsfmUpdateBlockElement(UsfmUpdateBlockElementType.EMBED, list(tokens), marked_for_removal)
+        )
+
+    def extend_last_element(self, tokens: Iterable[UsfmToken]) -> None:
+        self._elements[-1].tokens.extend(tokens)
+
+    def update_refs(self, refs: Iterable[ScriptureRef]) -> None:
+        self._refs = list(refs)
+
+    def get_tokens(self) -> list[UsfmToken]:
+        return [token for element in self._elements for token in element.get_tokens()]
+
+    def __eq__(self, other: UsfmUpdateBlock) -> bool:
+        return self._refs == other._refs and self._elements == other._elements
+
+    def copy(self) -> UsfmUpdateBlock:
+        return UsfmUpdateBlock(self._refs, self._elements)
diff --git a/machine/corpora/usfm_update_block_element.py b/machine/corpora/usfm_update_block_element.py
new file mode 100644
index 00000000..46a70651
--- /dev/null
+++ b/machine/corpora/usfm_update_block_element.py
@@ -0,0 +1,24 @@
+from dataclasses import dataclass
+from enum import Enum, auto
+
+from .usfm_token import UsfmToken
+
+
+class UsfmUpdateBlockElementType(Enum):
+    TEXT = auto()
+    PARAGRAPH = auto()
+    EMBED = auto()
+    STYLE = auto()
+    OTHER = auto()
+
+
+@dataclass(frozen=True)
+class UsfmUpdateBlockElement:
+    type: UsfmUpdateBlockElementType
+    tokens: list[UsfmToken]
+    marked_for_removal: bool = False
+
+    def get_tokens(self) -> list[UsfmToken]:
+        if self.marked_for_removal:
+            return []
+        return self.tokens.copy()
diff --git a/machine/corpora/usfm_update_block_handler.py b/machine/corpora/usfm_update_block_handler.py
new file mode 100644
index 00000000..b06a31dd
--- /dev/null
+++ b/machine/corpora/usfm_update_block_handler.py
@@ -0,0 +1,8 @@
+from abc import ABC, abstractmethod
+
+from .usfm_update_block import UsfmUpdateBlock
+
+
+class UsfmUpdateBlockHandler(ABC):
+    @abstractmethod
+    def process_block(self, block: UsfmUpdateBlock) -> UsfmUpdateBlock: ...
diff --git a/poetry.lock b/poetry.lock
index a290b5dd..860194ff 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 2.1.2 and should not be changed by hand.
+# This file is automatically @generated by Poetry 2.1.3 and should not be changed by hand.
 
 [[package]]
 name = "accelerate"
@@ -777,6 +777,8 @@ version = "3.0.12"
 description = "The Cython compiler for writing C extensions in the Python language."
 optional = false
 python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,>=2.7"
+groups = ["main"]
+markers = "sys_platform == \"linux\" and extra == \"jobs\""
 files = [
     {file = "Cython-3.0.12-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ba67eee9413b66dd9fbacd33f0bc2e028a2a120991d77b5fd4b19d0b1e4039b9"},
     {file = "Cython-3.0.12-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bee2717e5b5f7d966d0c6e27d2efe3698c357aa4d61bb3201997c7a4f9fe485a"},
@@ -1008,6 +1010,8 @@ version = "2.0.0"
 description = "pip installable eflomal"
 optional = false
 python-versions = "*"
+groups = ["main"]
+markers = "sys_platform == \"linux\" and extra == \"jobs\""
 files = [
     {file = "eflomal-2.0.0.tar.gz", hash = "sha256:b71183dcf85bf4f59f44ef7a59f5268df1c17c0c8d8093f77b220025ffdba100"},
 ]
@@ -3337,14 +3341,14 @@ diagrams = ["jinja2", "railroad-diagrams"]
 
 [[package]]
 name = "pyright"
-version = "1.1.399"
+version = "1.1.400"
 description = "Command line wrapper for pyright"
 optional = false
 python-versions = ">=3.7"
 groups = ["dev"]
 files = [
-    {file = "pyright-1.1.399-py3-none-any.whl", hash = "sha256:55f9a875ddf23c9698f24208c764465ffdfd38be6265f7faf9a176e1dc549f3b"},
-    {file = "pyright-1.1.399.tar.gz", hash = "sha256:439035d707a36c3d1b443aec980bc37053fbda88158eded24b8eedcf1c7b7a1b"},
+    {file = "pyright-1.1.400-py3-none-any.whl", hash = "sha256:c80d04f98b5a4358ad3a35e241dbf2a408eee33a40779df365644f8054d2517e"},
+    {file = "pyright-1.1.400.tar.gz", hash = "sha256:b8a3ba40481aa47ba08ffb3228e821d22f7d391f83609211335858bf05686bdb"},
 ]
 
 [package.dependencies]
@@ -5136,4 +5140,4 @@ thot = ["sil-thot"]
 [metadata]
 lock-version = "2.1"
 python-versions = ">=3.9,<3.13"
-content-hash = "d292103e26b41fd440528597df80a64661ef21afd6be8fd07a8c34521729ad65"
+content-hash = "f56942f52a117fba35a5f43ee631c386ff95dd270301805558064d0228253624"
diff --git a/pyproject.toml b/pyproject.toml
index 853dc368..0498016f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -84,7 +84,7 @@ pytest-cov = "^4.1.0"
 ipykernel = "^6.7.0"
 jupyter = "^1.0.0"
 pandas = "^2.0.3"
-pyright = { extras = ["nodejs"], version = "^1.1.399" }
+pyright = { extras = ["nodejs"], version = "^1.1.400" }
 decoy = "^2.1.0"
 pep8-naming = "^0.14.1"
 
diff --git a/tests/corpora/test_update_usfm_parser_handler.py b/tests/corpora/test_update_usfm_parser_handler.py
index c9ee6ba8..086f2551 100644
--- a/tests/corpora/test_update_usfm_parser_handler.py
+++ b/tests/corpora/test_update_usfm_parser_handler.py
@@ -1,4 +1,4 @@
-from typing import List, Optional, Sequence, Tuple
+from typing import Iterable, List, Optional, Sequence, Tuple, Union
 
 from testutils.corpora_test_helpers import USFM_TEST_PROJECT_PATH, ignore_line_endings
 
@@ -8,6 +8,9 @@
     UpdateUsfmMarkerBehavior,
     UpdateUsfmParserHandler,
     UpdateUsfmTextBehavior,
+    UsfmUpdateBlock,
+    UsfmUpdateBlockElementType,
+    UsfmUpdateBlockHandler,
     parse_usfm,
 )
 
@@ -50,8 +53,8 @@ def test_get_usfm_strip_all_text() -> None:
 \r keep this reference
 \rem and this reference too
 \ip but remove this text
-\v 1 Chapter \add one\add*, \p verse \f + \fr 2:1: \ft This is a \fm ∆\fm* footnote.\f*one.
-\v 2 Chapter \add one\add*, \p verse \f + \fr 2:1: \ft This is a \fm ∆\fm* footnote.\f*two.
+\v 1 Chapter \add one\add*, \p verse \f + \fr 1:1: \ft This is a \+bd ∆\+bd* footnote.\f*one.
+\v 2 Chapter \add one\add*, \p verse \f + \fr 1:2: \ft This is a \+bd ∆\+bd* footnote.\f*two.
 \v 3 Verse 3
 \v 4 Verse 4
 """
@@ -66,18 +69,18 @@ def test_get_usfm_strip_all_text() -> None:
     )
 
     result = r"""\id MAT
-\c 1
-\r keep this reference
-\rem and this reference too
-\ip
-\v 1 Update 1 \add \add*
-\p \f + \fr 2:1: \ft \fm ∆\fm*\f*
-\v 2 \add \add*
-\p \f + \fr 2:1: \ft \fm ∆\fm*\f*
-\v 3 Update 3
-\v 4
-"""
-    assess(target, result)
+    \c 1
+    \r keep this reference
+    \rem and this reference too
+    \ip
+    \v 1 Update 1 \add \add*
+    \p \f + \fr 1:1: \ft This is a \+bd ∆\+bd* footnote.\f*
+    \v 2 \add \add*
+    \p \f + \fr 1:2: \ft This is a \+bd ∆\+bd* footnote.\f*
+    \v 3 Update 3
+    \v 4
+    """
+    assert_usfm_equals(target, result)
 
     target = update_usfm(
         rows,
@@ -98,7 +101,7 @@ def test_get_usfm_strip_all_text() -> None:
 \v 3 Update 3
 \v 4
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_get_usfm_strip_paragraphs_preserve_paragraph_styles():
@@ -129,7 +132,7 @@ def test_get_usfm_strip_paragraphs_preserve_paragraph_styles():
 \v 1 Update 1
 """
 
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     target_diff_paragraph = update_usfm(
         rows,
@@ -146,7 +149,7 @@ def test_get_usfm_strip_paragraphs_preserve_paragraph_styles():
 \v 1 Update 1
 """
 
-    assess(target_diff_paragraph, result_diff_paragraph)
+    assert_usfm_equals(target_diff_paragraph, result_diff_paragraph)
 
 
 def test_preserve_paragraphs():
@@ -177,7 +180,7 @@ def test_preserve_paragraphs():
 \v 1 Update 1
 """
 
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     target_diff_paragraph = update_usfm(
         rows, usfm, text_behavior=UpdateUsfmTextBehavior.STRIP_EXISTING, preserve_paragraph_styles=("ip")
@@ -190,7 +193,7 @@ def test_preserve_paragraphs():
 \v 1 Update 1
 """
 
-    assess(target_diff_paragraph, result_diff_paragraph)
+    assert_usfm_equals(target_diff_paragraph, result_diff_paragraph)
 
 
 def test_paragraph_in_verse():
@@ -219,7 +222,7 @@ def test_paragraph_in_verse():
 \p inner verse paragraph
 """
 
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     target_strip = update_usfm(
         rows,
@@ -236,7 +239,7 @@ def test_paragraph_in_verse():
 \v 2
 """
 
-    assess(target_strip, result_strip)
+    assert_usfm_equals(target_strip, result_strip)
 
 
 def test_get_usfm_prefer_existing():
@@ -265,7 +268,7 @@ def test_get_usfm_prefer_existing():
 \v 2 Update 2
 \v 3 Other text
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_get_usfm_prefer_rows():
@@ -298,25 +301,23 @@ def test_get_usfm_verse_strip_note() -> None:
     assert "\\v 1 First verse of the second chapter.\r\n" in target
 
 
-def test_get_usfm_verse_replace_note() -> None:
+def test_get_usfm_verse_replace_with_note() -> None:
     rows = [
         (
             scr_ref("MAT 1:1"),
             str("updated text"),
         ),
-        (scr_ref("MAT 1:1/1:f"), str("This is a new footnote.")),
     ]
     usfm = r"""\id MAT - Test
 \c 1
-\v 1 Chapter \add one\add*, verse \f + \fr 2:1: \ft This is a \fq quotation \ft and an \fqa alternative quotation\f*one.
+\v 1 Chapter \add one\add*, verse \f + \fr 2:1: \ft This is a footnote.\f*one.
 """
     target = update_usfm(rows, usfm)
-    # Only the first \ft marker is updated
     result = r"""\id MAT - Test
 \c 1
-\v 1 updated text \f + \fr 2:1: \ft This is a new footnote. \fq quotation \ft and an \fqa alternative quotation\f*
+\v 1 updated text \f + \fr 2:1: \ft This is a footnote.\f*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_get_usfm_row_verse_segment() -> None:
@@ -427,7 +428,7 @@ def test_get_usfm_merge_verse_segments() -> None:
     ]
     target = update_usfm(rows)
     assert target is not None
-    assert "\\v 2-3 Verse 2. Verse 2a. Verse 2b. \\fm ∆\\fm*\r\n" in target
+    assert "\\v 2-3 Verse 2. Verse 2a. Verse 2b.\r\n" in target
 
 
 def test_get_usfm_verse_opt_break() -> None:
@@ -528,7 +529,7 @@ def test_get_usfm_nonverse_relaxed() -> None:
 def test_get_usfm_nonverse_sidebar() -> None:
     rows = [
         (
-            scr_ref("MAT 2:3/2:esb/1:ms"),
+            scr_ref("MAT 2:3/1:esb/1:ms"),
             str("The first paragraph of the sidebar."),
         )
     ]
@@ -556,7 +557,7 @@ def test_get_usfm_nonverse_table() -> None:
 def test_get_usfm_nonverse_optbreak() -> None:
     rows = [
         (
-            scr_ref("MAT 2:3/2:esb/2:p"),
+            scr_ref("MAT 2:3/1:esb/2:p"),
             str("The second paragraph of the sidebar."),
         )
     ]
@@ -589,20 +590,16 @@ def test_get_usfm_nonverse_skip_note() -> None:
     assert "\\ip The introductory paragraph.\r\n" in target
 
 
-def test_get_usfm_nonverse_replace_note() -> None:
+def test_get_usfm_nonverse_replace_with_note() -> None:
     rows = [
         (
             scr_ref("MAT 1:0/3:ip"),
             str("The introductory paragraph."),
         ),
-        (
-            scr_ref("MAT 1:0/3:ip/1:fe"),
-            str("This is a new endnote."),
-        ),
     ]
     target = update_usfm(rows)
     assert target is not None
-    assert "\\ip The introductory paragraph. \\fe + \\ft This is a new endnote. \\fe*\r\n" in target
+    assert "\\ip The introductory paragraph. \\fe + \\ft This is an endnote.\\fe*\r\n" in target
 
 
 def test_get_usfm_verse_double_va_vp() -> None:
@@ -673,77 +670,6 @@ def test_get_usfm_verse_pretranslations_before_text() -> None:
     assert "\\ip The introductory paragraph. \\fe + \\ft This is an endnote.\\fe*\r\n" in target
 
 
-def test_embed_style_preservation() -> None:
-    rows = [
-        (
-            scr_ref("MAT 1:1"),
-            str("Update the greeting"),
-        ),
-        (
-            scr_ref("MAT 1:1/1:f"),
-            str("Update the comment"),
-        ),
-        (
-            scr_ref("MAT 1:2"),
-            str("Update the greeting only"),
-        ),
-        (
-            scr_ref("MAT 1:3/1:f"),
-            str("Update the comment only"),
-        ),
-    ]
-    usfm = r"""\id MAT - Test
-\c 1
-\v 1 Hello \f \fr 1.1 \ft Some \+bd note\+bd* \f*\bd World \bd*
-\v 2 Good \f \fr 1.2 \ft Some other \+bd note\+bd* \f*\bd Morning \bd*
-\v 3 Pleasant \f \fr 1.3 \ft A third \+bd note\+bd* \f*\bd Evening \bd*
-"""
-
-    target = update_usfm(
-        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE, style_behavior=UpdateUsfmMarkerBehavior.PRESERVE
-    )
-    result_pp = r"""\id MAT - Test
-\c 1
-\v 1 Update the greeting \f \fr 1.1 \ft Update the comment \+bd \+bd*\f*\bd \bd*
-\v 2 Update the greeting only \f \fr 1.2 \ft Some other \+bd note\+bd* \f*\bd \bd*
-\v 3 Pleasant \f \fr 1.3 \ft Update the comment only \+bd \+bd*\f*\bd Evening \bd*
-"""
-    assess(target, result_pp)
-
-    target = update_usfm(
-        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE, style_behavior=UpdateUsfmMarkerBehavior.STRIP
-    )
-    result_ps = r"""\id MAT - Test
-\c 1
-\v 1 Update the greeting \f \fr 1.1 \ft Update the comment \f*
-\v 2 Update the greeting only \f \fr 1.2 \ft Some other \+bd note\+bd* \f*
-\v 3 Pleasant \f \fr 1.3 \ft Update the comment only \f*\bd Evening \bd*
-"""
-    assess(target, result_ps)
-
-    target = update_usfm(
-        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.STRIP, style_behavior=UpdateUsfmMarkerBehavior.PRESERVE
-    )
-    result_sp = r"""\id MAT - Test
-\c 1
-\v 1 Update the greeting \bd \bd*
-\v 2 Update the greeting only \bd \bd*
-\v 3 Pleasant \bd Evening \bd*
-"""
-    assess(target, result_sp)
-
-    target = update_usfm(
-        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.STRIP, style_behavior=UpdateUsfmMarkerBehavior.STRIP
-    )
-    result_ss = r"""\id MAT - Test
-\c 1
-\v 1 Update the greeting
-\v 2 Update the greeting only
-\v 3 Pleasant \bd Evening \bd*
-"""
-    assess(target, result_ss)
-
-
 def test_strip_paragraphs() -> None:
     rows = [
         (
@@ -776,7 +702,7 @@ def test_strip_paragraphs() -> None:
 \p World
 """
 
-    assess(target, result_p)
+    assert_usfm_equals(target, result_p)
 
     target = update_usfm(rows, usfm, paragraph_behavior=UpdateUsfmMarkerBehavior.STRIP)
     result_s = r"""\id MAT - Test
@@ -787,7 +713,7 @@ def test_strip_paragraphs() -> None:
 \v 2 Hello
 \p World
 """
-    assess(target, result_s)
+    assert_usfm_equals(target, result_s)
 
 
 def test_preservation_raw_strings() -> None:
@@ -807,7 +733,7 @@ def test_preservation_raw_strings() -> None:
 \c 1
 \v 1 Update all in one row \f \fr 1.1 \ft Some note \f*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_beginning_of_verse_embed() -> None:
@@ -827,27 +753,7 @@ def test_beginning_of_verse_embed() -> None:
 \c 1
 \v 1 Updated text
 """
-    assess(target, result)
-
-
-def test_empty_note() -> None:
-    rows = [
-        (
-            scr_ref("MAT 1:1/1:f"),
-            str("Update the note"),
-        )
-    ]
-    usfm = r"""\id MAT - Test
-\c 1
-\v 1 Empty Note \f \fr 1.1 \ft \f*
-"""
-
-    target = update_usfm(rows, usfm)
-    result = r"""\id MAT - Test
-\c 1
-\v 1 Empty Note \f \fr 1.1 \ft Update the note \f*
-"""
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
 def test_cross_reference_dont_update() -> None:
@@ -867,10 +773,10 @@ def test_cross_reference_dont_update() -> None:
 \c 1
 \v 1 Cross reference verse \x - \xo 2:3-4 \xt Cool Book 3:24 \xta The annotation \x* and more content.
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
-def test_preserve_fig_and_fm() -> None:
+def test_preserve_fig() -> None:
     rows = [
         (
             scr_ref("MAT 1:1"),
@@ -879,18 +785,18 @@ def test_preserve_fig_and_fm() -> None:
     ]
     usfm = r"""\id MAT - Test
 \c 1
-\v 1 initial text \fig stuff\fig* more text \fm * \fm* and more.
+\v 1 initial text \fig stuff\fig* more text and more.
 """
 
     target = update_usfm(rows, usfm)
     result = r"""\id MAT - Test
 \c 1
-\v 1 Update \fig stuff\fig*\fm * \fm*
+\v 1 Update \fig stuff\fig*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
-def test_nested_xt() -> None:
+def test_note_explicit_end_markers() -> None:
     rows = [
         (
             scr_ref("MAT 1:1"),
@@ -903,108 +809,335 @@ def test_nested_xt() -> None:
     ]
     usfm = r"""\id MAT - Test
 \c 1
-\v 1 initial text \f + \fr 15.8 \ft Text (\+xt reference\+xt*). And more.\f* and the end.
+\v 1 initial text \f + \fr 2.4\fr* \fk The \+nd Lord\+nd*:\fk* \ft See \+nd Lord\+nd* in Word List.\ft*\f* and the end.
 """
 
     target = update_usfm(rows, usfm)
     result = r"""\id MAT - Test
 \c 1
-\v 1 Update text \f + \fr 15.8 \ft Update note \+xt reference\+xt*\f*
+\v 1 Update text \f + \fr 2.4\fr* \fk The \+nd Lord\+nd*:\fk* \ft See \+nd Lord\+nd* in Word List.\ft*\f*
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
     target = update_usfm(rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.STRIP)
     result = r"""\id MAT - Test
 \c 1
 \v 1 Update text
 """
-    assess(target, result)
+    assert_usfm_equals(target, result)
 
 
-def test_non_nested_xt() -> None:
+def test_update_block_verse_preserve_paras() -> None:
     rows = [
         (
             scr_ref("MAT 1:1"),
-            str("Update text"),
+            str("Update 1"),
         ),
+    ]
+    usfm = r"""\id MAT - Test
+\c 1
+\v 1 verse 1 \p inner verse paragraph
+"""
+
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(
+        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE, update_block_handlers=[update_block_handler]
+    )
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse 1 ", True),
+        (UsfmUpdateBlockElementType.PARAGRAPH, "\\p ", False),
+        (UsfmUpdateBlockElementType.TEXT, "inner verse paragraph ", True),
+    )
+
+
+def test_update_block_verse_strip_paras() -> None:
+    rows = [
         (
-            scr_ref("MAT 1:1/1:f"),
-            str("Update note"),
+            scr_ref("MAT 1:1"),
+            str("Update 1"),
         ),
     ]
     usfm = r"""\id MAT - Test
 \c 1
-\v 1 initial text \f + \fr 15.8 \ft Text \xt reference\f* and the end.
+\v 1 verse 1 \p inner verse paragraph
 """
 
-    target = update_usfm(rows, usfm)
-    result = r"""\id MAT - Test
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(
+        rows, usfm, paragraph_behavior=UpdateUsfmMarkerBehavior.STRIP, update_block_handlers=[update_block_handler]
+    )
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse 1 ", True),
+        (UsfmUpdateBlockElementType.PARAGRAPH, "\\p ", True),
+        (UsfmUpdateBlockElementType.TEXT, "inner verse paragraph ", True),
+    )
+
+
+def test_update_block_verse_range() -> None:
+    rows = [
+        (
+            scr_ref("MAT 1:1"),
+            str("Update 1"),
+        ),
+    ]
+    usfm = r"""\id MAT - Test
 \c 1
-\v 1 Update text \f + \fr 15.8 \ft Update note \xt reference\f*
+\v 1-3 verse 1 through 3
 """
-    assess(target, result)
 
-    target = update_usfm(rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.STRIP)
-    result = r"""\id MAT - Test
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(
+        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE, update_block_handlers=[update_block_handler]
+    )
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        ["MAT 1:1", "MAT 1:2", "MAT 1:3"],
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1-3 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse 1 through 3 ", True),
+    )
+
+
+def test_update_block_footnote_preserve_embeds() -> None:
+    rows = [
+        (
+            scr_ref("MAT 1:1"),
+            str("Update 1"),
+        ),
+    ]
+    usfm = r"""\id MAT - Test
 \c 1
-\v 1 Update text
+\v 1 verse\f \fr 1.1 \ft Some note \f* 1
 """
-    assess(target, result)
+
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(
+        rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.PRESERVE, update_block_handlers=[update_block_handler]
+    )
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse", True),
+        (UsfmUpdateBlockElementType.EMBED, "\\f \\fr 1.1 \\ft Some note \\f*", False),
+        (UsfmUpdateBlockElementType.TEXT, " 1 ", True),
+    )
 
 
-def test_multiple_ft_only_update_first() -> None:
+def test_update_block_footnote_strip_embeds() -> None:
     rows = [
         (
             scr_ref("MAT 1:1"),
-            str("Update text"),
+            str("Update 1"),
         ),
+    ]
+    usfm = r"""\id MAT - Test
+\c 1
+\v 1 verse\f \fr 1.1 \ft Some note \f* 1
+"""
+
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.STRIP, update_block_handlers=[update_block_handler])
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse", True),
+        (UsfmUpdateBlockElementType.EMBED, "\\f \\fr 1.1 \\ft Some note \\f*", True),
+        (UsfmUpdateBlockElementType.TEXT, " 1 ", True),
+    )
+
+
+def test_update_block_nonverse() -> None:
+    rows = [
         (
-            scr_ref("MAT 1:1/1:f"),
-            str("Update note"),
+            scr_ref("MAT 1:0/1:s"),
+            str("Updated section Header"),
         ),
     ]
     usfm = r"""\id MAT - Test
+\s Section header
 \c 1
-\v 1 initial text \f + \fr 15.8 \ft first note \ft second note\f* and the end.
+\v 1 verse 1
 """
 
-    target = update_usfm(rows, usfm)
-    result = r"""\id MAT - Test
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(rows, usfm, update_block_handlers=[update_block_handler])
+
+    assert len(update_block_handler.blocks) == 2
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:0/1:s",
+        (UsfmUpdateBlockElementType.TEXT, "Updated section Header ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Section header ", True),
+    )
+
+
+def test_update_block_verse_preserve_styles() -> None:
+    rows = [
+        (
+            scr_ref("MAT 1:1"),
+            str("Update 1"),
+        ),
+    ]
+    usfm = r"""\id MAT - Test
 \c 1
-\v 1 Update text \f + \fr 15.8 \ft Update note \ft second note\f*
+\v 1 verse \bd 1\bd*
 """
-    assess(target, result)
 
-    target = update_usfm(rows, usfm, embed_behavior=UpdateUsfmMarkerBehavior.STRIP)
-    result = r"""\id MAT - Test
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(
+        rows, usfm, style_behavior=UpdateUsfmMarkerBehavior.PRESERVE, update_block_handlers=[update_block_handler]
+    )
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse ", True),
+        (UsfmUpdateBlockElementType.STYLE, "\\bd ", False),
+        (UsfmUpdateBlockElementType.TEXT, "1", True),
+        (UsfmUpdateBlockElementType.STYLE, "\\bd*", False),
+        (UsfmUpdateBlockElementType.TEXT, " ", True),
+    )
+
+
+def test_update_block_verse_strip_styles() -> None:
+    rows = [
+        (
+            scr_ref("MAT 1:1"),
+            str("Update 1"),
+        ),
+    ]
+    usfm = r"""\id MAT - Test
 \c 1
-\v 1 Update text
+\v 1 verse \bd 1\bd*
 """
-    assess(target, result)
+
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(rows, usfm, style_behavior=UpdateUsfmMarkerBehavior.STRIP, update_block_handlers=[update_block_handler])
+
+    assert len(update_block_handler.blocks) == 1
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "verse ", True),
+        (UsfmUpdateBlockElementType.STYLE, "\\bd ", True),
+        (UsfmUpdateBlockElementType.TEXT, "1", True),
+        (UsfmUpdateBlockElementType.STYLE, "\\bd*", True),
+        (UsfmUpdateBlockElementType.TEXT, " ", True),
+    )
 
 
-def test_implicitly_closed_char_style() -> None:
+def test_update_block_verse_section_header() -> None:
     rows = [
         (
             scr_ref("MAT 1:1"),
-            str("Update text"),
-        )
+            str("Update 1"),
+        ),
     ]
     usfm = r"""\id MAT - Test
 \c 1
-\v 1 Verse \bd one.
-\c 2
-\v 1 Verse one.
+\p
+\v 1 Verse 1
+\s Section header
+\p
+\v 2 Verse 2
 """
 
-    target = update_usfm(rows, usfm)
-    result = r"""\id MAT - Test
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(rows, usfm, update_block_handlers=[update_block_handler])
+
+    assert len(update_block_handler.blocks) == 4
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(update_block, "MAT 1:0/1:p")
+    update_block = update_block_handler.blocks[1]
+    assert_update_block_equals(update_block, "MAT 1:1/1:s", (UsfmUpdateBlockElementType.TEXT, "Section header ", False))
+    update_block = update_block_handler.blocks[2]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Verse 1 ", True),
+        (UsfmUpdateBlockElementType.PARAGRAPH, "\\s Section header ", False),
+        (UsfmUpdateBlockElementType.PARAGRAPH, "\\p ", False),
+    )
+    update_block = update_block_handler.blocks[3]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:2",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 2 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Verse 2 ", False),
+    )
+
+
+def test_update_block_verse_section_header_in_verse() -> None:
+    rows = [
+        (
+            scr_ref("MAT 1:1"),
+            str("Update 1"),
+        ),
+    ]
+    usfm = r"""\id MAT - Test
 \c 1
-\v 1 Update text
-\c 2
-\v 1 Verse one.
+\p
+\v 1 Beginning of verse
+\s Section header
+\p end of verse
 """
-    assess(target, result)
+
+    update_block_handler = TestUsfmUpdateBlockHandler()
+    update_usfm(rows, usfm, update_block_handlers=[update_block_handler])
+
+    assert len(update_block_handler.blocks) == 3
+    update_block = update_block_handler.blocks[0]
+    assert_update_block_equals(update_block, "MAT 1:0/1:p")
+    update_block = update_block_handler.blocks[1]
+    assert_update_block_equals(update_block, "MAT 1:1/1:s", (UsfmUpdateBlockElementType.TEXT, "Section header ", False))
+    update_block = update_block_handler.blocks[2]
+    assert_update_block_equals(
+        update_block,
+        "MAT 1:1",
+        (UsfmUpdateBlockElementType.OTHER, "\\v 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Update 1 ", False),
+        (UsfmUpdateBlockElementType.TEXT, "Beginning of verse ", True),
+        (UsfmUpdateBlockElementType.PARAGRAPH, "\\s Section header ", False),
+        (UsfmUpdateBlockElementType.PARAGRAPH, "\\p ", False),
+        (UsfmUpdateBlockElementType.TEXT, "end of verse ", True),
+    )
 
 
 def scr_ref(*refs: str) -> List[ScriptureRef]:
@@ -1019,7 +1152,8 @@ def update_usfm(
     paragraph_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
     embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
     style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
-    preserve_paragraph_styles: Optional[Sequence[str]] = None,
+    preserve_paragraph_styles: Optional[Iterable[str]] = None,
+    update_block_handlers: Optional[Iterable[UsfmUpdateBlockHandler]] = None,
 ) -> Optional[str]:
     if source is None:
         updater = FileParatextProjectTextUpdater(USFM_TEST_PROJECT_PATH)
@@ -1032,17 +1166,25 @@ def update_usfm(
             embed_behavior,
             style_behavior,
             preserve_paragraph_styles,
+            update_block_handlers,
         )
     else:
         source = source.strip().replace("\r\n", "\n") + "\r\n"
         updater = UpdateUsfmParserHandler(
-            rows, id_text, text_behavior, paragraph_behavior, embed_behavior, style_behavior, preserve_paragraph_styles
+            rows,
+            id_text,
+            text_behavior,
+            paragraph_behavior,
+            embed_behavior,
+            style_behavior,
+            preserve_paragraph_styles,
+            update_block_handlers,
         )
         parse_usfm(source, updater)
         return updater.get_usfm()
 
 
-def assess(target: Optional[str], truth: str) -> None:
+def assert_usfm_equals(target: Optional[str], truth: str) -> None:
     assert target is not None
     for target_line, truth_line in zip(target.split("\n"), truth.split("\n")):
         assert target_line.strip() == truth_line.strip()
@@ -1051,3 +1193,26 @@ def assess(target: Optional[str], truth: str) -> None:
 def read_usfm() -> str:
     with (USFM_TEST_PROJECT_PATH / "41MATTes.SFM").open("r", encoding="utf-8-sig", newline="\r\n") as file:
         return file.read()
+
+
+def assert_update_block_equals(
+    block: UsfmUpdateBlock,
+    expected_ref: Union[str, Iterable[str]],
+    *expected_elements: tuple[UsfmUpdateBlockElementType, str, bool],
+) -> None:
+    assert block.refs == [ScriptureRef.parse(expected_ref)] if isinstance(expected_ref, str) else list(expected_ref)
+    assert len(block.elements) == len(expected_elements)
+    for element, [expected_type, expected_usfm, expected_marked_for_removal] in zip(block.elements, expected_elements):
+        assert element.type == expected_type
+        assert "".join(token.to_usfm() for token in element.tokens) == expected_usfm
+        assert element.marked_for_removal == expected_marked_for_removal
+
+
+class TestUsfmUpdateBlockHandler(UsfmUpdateBlockHandler):
+    def __init__(self):
+        self.blocks: list[UsfmUpdateBlock] = []
+
+    def process_block(self, block: UsfmUpdateBlock) -> UsfmUpdateBlock:
+        new_block = block.copy()
+        self.blocks.append(new_block)
+        return new_block
diff --git a/tests/corpora/test_usfm_file_text.py b/tests/corpora/test_usfm_file_text.py
index e046d71d..7c40597f 100644
--- a/tests/corpora/test_usfm_file_text.py
+++ b/tests/corpora/test_usfm_file_text.py
@@ -66,7 +66,7 @@ def test_get_rows_nonempty_text_all_text() -> None:
     assert text is not None
     rows = list(text)
 
-    assert len(rows) == 52
+    assert len(rows) == 48
 
     assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/1:h", corpus.versification)
     assert rows[0].text == "Matthew"
@@ -77,56 +77,44 @@ def test_get_rows_nonempty_text_all_text() -> None:
     assert scripture_ref(rows[2]) == ScriptureRef.parse("MAT 1:0/3:ip", corpus.versification)
     assert rows[2].text == "An introduction to Matthew"
 
-    assert scripture_ref(rows[3]) == ScriptureRef.parse("MAT 1:0/3:ip/1:fe", corpus.versification)
-    assert rows[3].text == "This is an endnote."
-
-    assert scripture_ref(rows[4]) == ScriptureRef.parse("Mat 1:0/4:p", corpus.versification)
-    assert rows[4].text == "MAT 1 Here is another paragraph."
-
-    assert scripture_ref(rows[7]) == ScriptureRef.parse("MAT 1:0/7:weirdtaglookingthing", corpus.versification)
-    assert rows[7].text == "that is not an actual tag."
+    assert scripture_ref(rows[3]) == ScriptureRef.parse("Mat 1:0/4:p", corpus.versification)
+    assert rows[3].text == "MAT 1 Here is another paragraph."
 
-    assert scripture_ref(rows[8]) == ScriptureRef.parse("MAT 1:0/8:s", corpus.versification)
-    assert rows[8].text == "Chapter One"
+    assert scripture_ref(rows[6]) == ScriptureRef.parse("MAT 1:0/7:weirdtaglookingthing", corpus.versification)
+    assert rows[6].text == "that is not an actual tag."
 
-    assert scripture_ref(rows[10]) == ScriptureRef.parse("MAT 1:1/1:f", corpus.versification)
-    assert rows[10].text == "This is a footnote for v1."
+    assert scripture_ref(rows[7]) == ScriptureRef.parse("MAT 1:0/8:s", corpus.versification)
+    assert rows[7].text == "Chapter One"
 
-    assert scripture_ref(rows[12]) == ScriptureRef.parse("MAT 1:2/1:f", corpus.versification)
-    assert rows[12].text == "This is a footnote for v2."
+    assert scripture_ref(rows[16]) == ScriptureRef.parse("MAT 2:0/1:tr/1:tc1", corpus.versification)
+    assert rows[16].text == "Row one, column one."
 
-    assert scripture_ref(rows[19]) == ScriptureRef.parse("MAT 2:0/1:tr/1:tc1", corpus.versification)
-    assert rows[19].text == "Row one, column one."
+    assert scripture_ref(rows[17]) == ScriptureRef.parse("MAT 2:0/1:tr/2:tc2", corpus.versification)
+    assert rows[17].text == "Row one, column two."
 
-    assert scripture_ref(rows[20]) == ScriptureRef.parse("MAT 2:0/1:tr/2:tc2", corpus.versification)
-    assert rows[20].text == "Row one, column two."
+    assert scripture_ref(rows[18]) == ScriptureRef.parse("MAT 2:0/2:tr/1:tc1", corpus.versification)
+    assert rows[18].text == "Row two, column one."
 
-    assert scripture_ref(rows[21]) == ScriptureRef.parse("MAT 2:0/2:tr/1:tc1", corpus.versification)
-    assert rows[21].text == "Row two, column one."
+    assert scripture_ref(rows[19]) == ScriptureRef.parse("MAT 2:0/2:tr/2:tc2", corpus.versification)
+    assert rows[19].text == "Row two, column two."
 
-    assert scripture_ref(rows[22]) == ScriptureRef.parse("MAT 2:0/2:tr/2:tc2", corpus.versification)
-    assert rows[22].text == "Row two, column two."
-
-    assert scripture_ref(rows[23]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
-    assert rows[23].text == "Chapter Two"
-
-    assert scripture_ref(rows[24]) == ScriptureRef.parse("MAT 2:0/4:p", corpus.versification)
-    assert not rows[24].text
+    assert scripture_ref(rows[20]) == ScriptureRef.parse("MAT 2:0/3:s1", corpus.versification)
+    assert rows[20].text == "Chapter Two"
 
-    assert scripture_ref(rows[27]) == ScriptureRef.parse("MAT 2:1/1:f", corpus.versification)
-    assert rows[27].text == "This is a footnote."
+    assert scripture_ref(rows[21]) == ScriptureRef.parse("MAT 2:0/4:p", corpus.versification)
+    assert not rows[21].text
 
-    assert scripture_ref(rows[30]) == ScriptureRef.parse("MAT 2:3/2:esb/1:ms", corpus.versification)
-    assert rows[30].text == "This is a sidebar"
+    assert scripture_ref(rows[26]) == ScriptureRef.parse("MAT 2:3/1:esb/1:ms", corpus.versification)
+    assert rows[26].text == "This is a sidebar"
 
-    assert scripture_ref(rows[31]) == ScriptureRef.parse("MAT 2:3/2:esb/2:p", corpus.versification)
-    assert rows[31].text == "Here is some sidebar content."
+    assert scripture_ref(rows[27]) == ScriptureRef.parse("MAT 2:3/1:esb/2:p", corpus.versification)
+    assert rows[27].text == "Here is some sidebar content."
 
-    assert scripture_ref(rows[37]) == ScriptureRef.parse("MAT 2:7a/1:s", corpus.versification)
-    assert rows[37].text == "Section header"
+    assert scripture_ref(rows[33]) == ScriptureRef.parse("MAT 2:7a/1:s", corpus.versification)
+    assert rows[33].text == "Section header"
 
-    assert scripture_ref(rows[44]) == ScriptureRef.parse("MAT 2:12/1:restore", corpus.versification)
-    assert rows[44].text == "restore information"
+    assert scripture_ref(rows[40]) == ScriptureRef.parse("MAT 2:12/1:restore", corpus.versification)
+    assert rows[40].text == "restore information"
 
 
 def test_get_rows_sentence_start() -> None:
@@ -243,7 +231,7 @@ def test_get_rows_include_markers_all_text() -> None:
     assert scripture_ref(rows[23]) == ScriptureRef.parse("MAT 2:1", corpus.versification)
     assert rows[23].text == "Chapter \\add two\\add*, verse \\f + \\fr 2:1: \\ft This is a footnote.\\f*one."
 
-    assert scripture_ref(rows[27]) == ScriptureRef.parse("MAT 2:3/2:esb/2:p", corpus.versification)
+    assert scripture_ref(rows[27]) == ScriptureRef.parse("MAT 2:3/1:esb/2:p", corpus.versification)
     assert rows[27].text == "Here is some sidebar // content."
 
 
diff --git a/tests/corpora/test_usfm_memory_text.py b/tests/corpora/test_usfm_memory_text.py
index 46288b59..37b87563 100644
--- a/tests/corpora/test_usfm_memory_text.py
+++ b/tests/corpora/test_usfm_memory_text.py
@@ -111,11 +111,11 @@ def test_get_rows_verse_para_beginning_non_verse_segment() -> None:
 """,
         include_all_text=True,
     )
-    assert len(rows) == 5, str.join(",", [tr.text for tr in rows])
+    assert len(rows) == 4, str.join(",", [tr.text for tr in rows])
     assert rows[0].text == ""
     assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/1:q1")
-    assert rows[1].text == "World"
-    assert scripture_ref(rows[1]) == ScriptureRef.parse("MAT 1:0/1:q1/1:f")
+    assert rows[1].text == "First verse in line!?!"
+    assert scripture_ref(rows[1]) == ScriptureRef.parse("MAT 1:1")
 
 
 def test_get_rows_verse_para_comment_first() -> None:
@@ -129,11 +129,11 @@ def test_get_rows_verse_para_comment_first() -> None:
 """,
         include_all_text=True,
     )
-    assert rows[0].text == "World"
-    assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/1:f")
-    assert rows[1].text == "This is a comment"
-    assert scripture_ref(rows[1]) == ScriptureRef.parse("MAT 1:0/2:ip")
-    assert len(rows) == 3, str.join(",", [tr.text for tr in rows])
+    assert rows[0].text == "This is a comment"
+    assert scripture_ref(rows[0]) == ScriptureRef.parse("MAT 1:0/2:ip")
+    assert rows[1].text == "First verse in line!?!"
+    assert scripture_ref(rows[1]) == ScriptureRef.parse("MAT 1:1")
+    assert len(rows) == 2, str.join(",", [tr.text for tr in rows])
 
 
 def get_rows(usfm: str, include_markers: bool = False, include_all_text: bool = False) -> List[TextRow]: