Skip to content

Scripture Update block #168

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ jobs:
node-version: "14"
- name: Lint with pyright
run: |
npm install -g [email protected].386
npm install -g [email protected].399
poetry run pyright
- name: Test with pytest
run: poetry run pytest --cov --cov-report=xml
Expand Down
3 changes: 3 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
"source.organizeImports": "explicit"
},
},
"files.associations": {
"*.SFM": "usfm",
},
"black-formatter.path": [
"poetry",
"run",
Expand Down
6 changes: 3 additions & 3 deletions machine/corpora/paratext_project_terms_parser_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ def parse(self, term_categories: Sequence[str], use_term_glosses: bool = True) -
else:
term_id_to_category_dict = {}

terms_glosses_doc: Optional[ElementTree.ElementTree] = None
terms_glosses_doc: Optional[ElementTree.ElementTree[ElementTree.Element]] = None
resource_name = None
if self._settings.language_code is not None:
resource_name = _SUPPORTED_LANGUAGE_TERMS_LOCALIZATION_XMLS.get(self._settings.language_code)
Expand All @@ -57,7 +57,7 @@ def parse(self, term_categories: Sequence[str], use_term_glosses: bool = True) -
with open_binary(_SUPPORTED_LANGUAGE_TERMS_LOCALIZATION_XMLS_PACKAGE, resource_name) as stream:
terms_glosses_doc = ElementTree.parse(stream)

term_renderings_doc: Optional[ElementTree.ElementTree] = None
term_renderings_doc: Optional[ElementTree.ElementTree[ElementTree.Element]] = None
if self._exists("TermRenderings.xml"):
with self._open("TermRenderings.xml") as stream:
term_renderings_doc = ElementTree.parse(stream)
Expand Down Expand Up @@ -136,7 +136,7 @@ def _strip_parens(term_string: str, left: str = "(", right: str = ")") -> str:
return term_string


def _get_category_per_id(biblical_terms_doc: ElementTree.ElementTree) -> Dict[str, str]:
def _get_category_per_id(biblical_terms_doc: ElementTree.ElementTree[ElementTree.Element]) -> Dict[str, str]:
term_id_to_category_dict: Dict[str, str] = {}

for term in biblical_terms_doc.findall(".//Term"):
Expand Down
3 changes: 3 additions & 0 deletions machine/corpora/paratext_project_text_updater_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from .paratext_project_settings import ParatextProjectSettings
from .paratext_project_settings_parser_base import ParatextProjectSettingsParserBase
from .scripture_ref import ScriptureRef
from .scripture_update_block_handler import ScriptureUpdateBlockHandler
from .update_usfm_parser_handler import UpdateUsfmMarkerBehavior, UpdateUsfmParserHandler, UpdateUsfmTextBehavior
from .usfm_parser import parse_usfm

Expand All @@ -26,6 +27,7 @@ def update_usfm(
embed_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.PRESERVE,
style_behavior: UpdateUsfmMarkerBehavior = UpdateUsfmMarkerBehavior.STRIP,
preserve_paragraph_styles: Optional[Sequence[str]] = None,
update_block_handlers: Optional[list[ScriptureUpdateBlockHandler]] = None,
) -> Optional[str]:
file_name: str = self._settings.get_book_file_name(book_id)
if not self._exists(file_name):
Expand All @@ -40,6 +42,7 @@ def update_usfm(
embed_behavior,
style_behavior,
preserve_paragraph_styles,
update_block_handlers=update_block_handlers,
)
try:
parse_usfm(usfm, handler, self._settings.stylesheet, self._settings.versification)
Expand Down
49 changes: 49 additions & 0 deletions machine/corpora/scripture_update_block.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
from __future__ import annotations

from .scripture_ref import ScriptureRef
from .scripture_update_element import ScriptureUpdateElement, ScriptureUpdateElementType
from .usfm_token import UsfmToken, UsfmTokenType


class ScriptureUpdateBlock:

def __init__(self) -> None:
self._ref: ScriptureRef = ScriptureRef()
self._elements: list[ScriptureUpdateElement] = []

@property
def elements(self) -> list[ScriptureUpdateElement]:
return self._elements

def add_existing_text(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
self._elements.append(
ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
)

def add_inserted_text(self, tokens: list[UsfmToken]) -> None:
self._elements.append(ScriptureUpdateElement(ScriptureUpdateElementType.INSERTED_TEXT, tokens.copy()))

def add_token(self, token: UsfmToken, marked_for_removal: bool = False) -> None:
if token.type == UsfmTokenType.TEXT:
self._elements.append(
ScriptureUpdateElement(ScriptureUpdateElementType.EXISTING_TEXT, [token], marked_for_removal)
)
else:
self._elements.append(ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, [token], marked_for_removal))

def add_tokens(self, tokens: list[UsfmToken], marked_for_removal: bool = False) -> None:
if len(tokens) == 0:
return
self._elements.append(
ScriptureUpdateElement(ScriptureUpdateElementType.OTHER, tokens.copy(), marked_for_removal)
)

def update_ref(self, ref: ScriptureRef) -> None:
self._ref = ref

def clear(self) -> None:
self._elements.clear()
self._ref = ScriptureRef()

def get_tokens(self) -> list[UsfmToken]:
return [token for element in self._elements for token in element.get_tokens()]
11 changes: 11 additions & 0 deletions machine/corpora/scripture_update_block_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
from __future__ import annotations

from abc import ABC

from .scripture_update_block import ScriptureUpdateBlock


class ScriptureUpdateBlockHandler(ABC):

def process_block(self, block: ScriptureUpdateBlock) -> ScriptureUpdateBlock:
raise NotImplementedError("Must be implemented in subclass")
24 changes: 24 additions & 0 deletions machine/corpora/scripture_update_element.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
from __future__ import annotations

from dataclasses import dataclass
from enum import Enum, auto

from .usfm_token import UsfmToken


class ScriptureUpdateElementType(Enum):
EXISTING_TEXT = auto()
INSERTED_TEXT = auto()
OTHER = auto()


@dataclass
class ScriptureUpdateElement:
type: ScriptureUpdateElementType
tokens: list[UsfmToken]
marked_for_removal: bool = False

def get_tokens(self) -> list[UsfmToken]:
if self.marked_for_removal:
return []
return self.tokens
Loading
Loading