Skip to content

Commit ac7429d

Browse files
committed
Properly preserve specified markers (matching fix in machine)
1 parent 7cea3f5 commit ac7429d

File tree

2 files changed

+64
-13
lines changed

2 files changed

+64
-13
lines changed

machine/corpora/update_usfm_parser_handler.py

+16-13
Original file line numberDiff line numberDiff line change
@@ -323,20 +323,21 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
323323
)
324324

325325
use_new_tokens = (
326-
(
326+
not self._is_in_preserved_paragraph(marker)
327+
and (
327328
self._text_behavior == UpdateUsfmTextBehavior.STRIP_EXISTING
328-
and not self._is_in_preserved_paragraph(marker)
329+
or (
330+
self._has_new_text()
331+
and (not existing_text or self._text_behavior != UpdateUsfmTextBehavior.PREFER_EXISTING)
332+
)
329333
)
330-
or (
331-
self._has_new_text()
332-
and (not existing_text or self._text_behavior != UpdateUsfmTextBehavior.PREFER_EXISTING)
333-
)
334-
) and (
335-
not in_embed
336-
or (
337-
self._is_in_note_text()
338-
and not in_nested_embed
339-
and self._embed_behavior == UpdateUsfmMarkerBehavior.PRESERVE
334+
and (
335+
not in_embed
336+
or (
337+
self._is_in_note_text()
338+
and not in_nested_embed
339+
and self._embed_behavior == UpdateUsfmMarkerBehavior.PRESERVE
340+
)
340341
)
341342
)
342343

@@ -346,7 +347,9 @@ def _replace_with_new_tokens(self, state: UsfmParserState, closed: bool = True)
346347
else:
347348
self._add_new_tokens()
348349

349-
if existing_text and self._text_behavior == UpdateUsfmTextBehavior.PREFER_EXISTING:
350+
if existing_text and (
351+
self._text_behavior == UpdateUsfmTextBehavior.PREFER_EXISTING or self._is_in_preserved_paragraph(marker)
352+
):
350353
if in_embed:
351354
self._clear_new_embed_tokens()
352355
else:

tests/corpora/test_update_usfm_parser_handler.py

+48
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,54 @@ def test_get_usfm_strip_all_text() -> None:
101101
assess(target, result)
102102

103103

104+
def test_get_usfm_strip_paragraphs_preserve_paragraph_styles():
105+
rows = [
106+
(scr_ref("MAT 1:0/1:rem"), "New remark"),
107+
(scr_ref("MAT 1:0/3:ip"), "Another new remark"),
108+
(scr_ref("MAT 1:1"), "Update 1"),
109+
]
110+
usfm = r"""\id MAT
111+
\c 1
112+
\rem Update remark
113+
\r reference
114+
\ip This is another remark, but with a different marker
115+
\v 1 This is a verse
116+
"""
117+
118+
target = update_usfm(
119+
rows,
120+
usfm,
121+
text_behavior=UpdateUsfmTextBehavior.STRIP_EXISTING,
122+
paragraph_behavior=UpdateUsfmMarkerBehavior.STRIP,
123+
)
124+
result = r"""\id MAT
125+
\c 1
126+
\rem Update remark
127+
\r reference
128+
\ip Another new remark
129+
\v 1 Update 1
130+
"""
131+
132+
assess(target, result)
133+
134+
targetDiffParagraph = update_usfm(
135+
rows,
136+
usfm,
137+
text_behavior=UpdateUsfmTextBehavior.STRIP_EXISTING,
138+
paragraph_behavior=UpdateUsfmMarkerBehavior.STRIP,
139+
preserve_paragraph_styles=["ip"],
140+
)
141+
resultDiffParagraph = r"""\id MAT
142+
\c 1
143+
\rem New remark
144+
\r
145+
\ip This is another remark, but with a different marker
146+
\v 1 Update 1
147+
"""
148+
149+
assess(targetDiffParagraph, resultDiffParagraph)
150+
151+
104152
def test_preserve_paragraphs():
105153
rows = [
106154
(

0 commit comments

Comments
 (0)