5
5
from ..scripture .verse_ref import VerseRef , are_overlapping_verse_ranges
6
6
from .corpora_utils import merge_verse_ranges
7
7
from .scripture_element import ScriptureElement
8
- from .scripture_embed import EMBED_PART_START_CHAR_STYLES , is_embed_part_style , is_embed_style , is_note_text
9
8
from .scripture_ref import ScriptureRef
10
9
from .usfm_parser_handler import UsfmParserHandler
11
10
from .usfm_parser_state import UsfmParserState
@@ -16,7 +15,14 @@ class ScriptureTextType(Enum):
16
15
NONE = auto ()
17
16
NONVERSE = auto ()
18
17
VERSE = auto ()
19
- NOTE_TEXT = auto ()
18
+ EMBED = auto ()
19
+
20
+
21
+ _EMBED_STYLES = {"f" , "fe" , "x" , "fig" }
22
+
23
+
24
+ def _is_embed_style (marker : Optional [str ]) -> bool :
25
+ return marker is not None and (marker .strip ("*" ) in _EMBED_STYLES or marker .startswith ("z" ))
20
26
21
27
22
28
class ScriptureRefUsfmParserHandler (UsfmParserHandler , ABC ):
@@ -25,18 +31,11 @@ def __init__(self) -> None:
25
31
self ._cur_elements_stack : List [ScriptureElement ] = []
26
32
self ._cur_text_type_stack : List [ScriptureTextType ] = []
27
33
self ._duplicate_verse : bool = False
28
- self ._in_preserved_paragraph : bool = False
29
- self ._in_embed : bool = False
30
- self ._in_note_text : bool = False
31
- self ._in_nested_embed : bool = False
32
34
33
35
@property
34
36
def _current_text_type (self ) -> ScriptureTextType :
35
37
return ScriptureTextType .NONE if len (self ._cur_text_type_stack ) == 0 else self ._cur_text_type_stack [- 1 ]
36
38
37
- def _is_in_note_text (self ) -> bool :
38
- return self ._in_note_text
39
-
40
39
def end_usfm (self , state : UsfmParserState ) -> None :
41
40
self ._end_verse_text_wrapper (state )
42
41
@@ -112,32 +111,6 @@ def start_sidebar(self, state: UsfmParserState, marker: str, category: str) -> N
112
111
def end_sidebar (self , state : UsfmParserState , marker : str , closed : bool ) -> None :
113
112
self ._end_parent_element ()
114
113
115
- def start_note (self , state : UsfmParserState , marker : str , caller : str , category : Optional [str ]) -> None :
116
- self ._in_embed = True
117
- self ._start_embed_wrapper (state , marker )
118
-
119
- def end_note (self , state : UsfmParserState , marker : str , closed : bool ) -> None :
120
- self ._end_note_text_wrapper (state )
121
- self ._end_embed (state , marker , None , closed )
122
- self ._in_embed = False
123
-
124
- def _start_embed_wrapper (self , state : UsfmParserState , marker : str ) -> None :
125
- if self ._cur_verse_ref .is_default :
126
- self ._update_verse_ref (state .verse_ref , marker )
127
-
128
- if not self ._duplicate_verse :
129
- self ._check_convert_verse_para_to_non_verse (state )
130
- self ._next_element (marker )
131
-
132
- self ._start_embed (state , self ._create_non_verse_ref ())
133
-
134
- def _start_embed (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
135
-
136
- def _end_embed (
137
- self , state : UsfmParserState , marker : str , attributes : Optional [Sequence [UsfmAttribute ]], closed : bool
138
- ) -> None :
139
- pass
140
-
141
114
def text (self , state : UsfmParserState , text : str ) -> None :
142
115
# if we hit text in a verse paragraph and we aren't in a verse, then start a non-verse segment
143
116
if text .strip ():
@@ -149,29 +122,23 @@ def opt_break(self, state: UsfmParserState) -> None:
149
122
def start_char (
150
123
self , state : UsfmParserState , marker : str , unknown : bool , attributes : Optional [Sequence [UsfmAttribute ]]
151
124
) -> None :
152
- if is_embed_part_style (marker ) and self ._in_note_text :
153
- self ._in_nested_embed = True
154
125
# if we hit a character marker in a verse paragraph and we aren't in a verse, then start a non-verse segment
155
126
self ._check_convert_verse_para_to_non_verse (state )
156
127
157
- if is_embed_style (marker ):
158
- self ._in_embed = True
159
- self ._start_embed_wrapper (state , marker )
160
-
161
- if is_note_text (marker ):
162
- self ._start_note_text_wrapper (state )
128
+ if _is_embed_style (marker ):
129
+ self ._start_embed_text_wrapper (state , marker )
163
130
164
131
def end_char (
165
132
self , state : UsfmParserState , marker : str , attributes : Optional [Sequence [UsfmAttribute ]], closed : bool
166
133
) -> None :
167
- if is_embed_part_style (marker ):
168
- if self ._in_nested_embed :
169
- self . _in_nested_embed = False
170
- elif self . _is_note_text ( marker ) :
171
- self ._end_note_text_wrapper (state )
172
- if is_embed_style ( marker ):
173
- self . _end_embed ( state , marker , attributes , closed )
174
- self ._in_embed = False
134
+ if _is_embed_style (marker ):
135
+ self ._end_embed_text_wrapper ( state )
136
+
137
+ def start_note ( self , state , marker , caller , category ) -> None :
138
+ self ._start_embed_text_wrapper (state , marker )
139
+
140
+ def end_note ( self , state , marker , closed ) -> None :
141
+ self ._end_embed_text_wrapper ( state )
175
142
176
143
def _start_verse_text (self , state : UsfmParserState , scripture_refs : Optional [Sequence [ScriptureRef ]]) -> None : ...
177
144
@@ -181,20 +148,9 @@ def _start_non_verse_text(self, state: UsfmParserState, scripture_ref: Scripture
181
148
182
149
def _end_non_verse_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
183
150
184
- def _start_note_text_wrapper (self , state : UsfmParserState ):
185
- self ._in_note_text = True
186
- self ._cur_text_type_stack .append (ScriptureTextType .NOTE_TEXT )
187
- self ._start_note_text (state )
188
-
189
- def _start_note_text (self , state : UsfmParserState ) -> None : ...
190
-
191
- def _end_note_text_wrapper (self , state : UsfmParserState ):
192
- if self ._cur_text_type_stack and self ._cur_text_type_stack [- 1 ] == ScriptureTextType .NOTE_TEXT :
193
- self ._end_note_text (state , self ._create_non_verse_ref ())
194
- self ._cur_text_type_stack .pop ()
195
- self ._in_note_text = False
151
+ def _start_embed_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
196
152
197
- def _end_note_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
153
+ def _end_embed_text (self , state : UsfmParserState , scripture_ref : ScriptureRef ) -> None : ...
198
154
199
155
def _start_verse_text_wrapper (self , state : UsfmParserState ) -> None :
200
156
self ._duplicate_verse = False
@@ -222,6 +178,25 @@ def _update_verse_ref(self, verse_ref: VerseRef, marker: str) -> None:
222
178
self ._cur_elements_stack .append (ScriptureElement (0 , marker ))
223
179
self ._cur_verse_ref = verse_ref .copy ()
224
180
181
+ def _start_embed_text_wrapper (self , state : UsfmParserState , marker : str ) -> None :
182
+ if self ._cur_verse_ref .is_default :
183
+ self ._update_verse_ref (state .verse_ref , marker )
184
+
185
+ if not self ._duplicate_verse :
186
+ self ._check_convert_verse_para_to_non_verse (state )
187
+ self ._next_element (marker )
188
+ self ._cur_text_type_stack .append (ScriptureTextType .EMBED )
189
+ self ._start_embed_text (state , self ._create_non_verse_ref ())
190
+
191
+ def _end_embed_text_wrapper (self , state : UsfmParserState ) -> None :
192
+ if (
193
+ not self ._duplicate_verse
194
+ and self ._cur_text_type_stack
195
+ and self ._cur_text_type_stack [- 1 ] == ScriptureTextType .EMBED
196
+ ):
197
+ self ._end_embed_text (state , self ._create_non_verse_ref ())
198
+ self ._cur_text_type_stack .pop ()
199
+
225
200
def _next_element (self , marker : str ) -> None :
226
201
prev_elem : ScriptureElement = self ._cur_elements_stack .pop ()
227
202
self ._cur_elements_stack .append (ScriptureElement (prev_elem .position + 1 , marker ))
@@ -234,7 +209,7 @@ def _end_parent_element(self) -> None:
234
209
self ._cur_elements_stack .pop ()
235
210
236
211
def _end_embed_elements (self ) -> None :
237
- if self ._cur_elements_stack and is_embed_style (self ._cur_elements_stack [- 1 ].name ):
212
+ if self ._cur_elements_stack and _is_embed_style (self ._cur_elements_stack [- 1 ].name ):
238
213
self ._cur_elements_stack .pop ()
239
214
240
215
def _create_verse_refs (self ) -> List [ScriptureRef ]:
@@ -263,14 +238,3 @@ def _check_convert_verse_para_to_non_verse(self, state: UsfmParserState) -> None
263
238
):
264
239
self ._start_parent_element (para_tag .marker )
265
240
self ._start_non_verse_text_wrapper (state )
266
-
267
- def _is_in_embed (self , marker : Optional [str ]) -> bool :
268
- return self ._in_embed or is_embed_style (marker )
269
-
270
- def _is_in_nested_embed (self , marker : Optional [str ]) -> bool :
271
- return self ._in_nested_embed or (
272
- marker is not None
273
- and marker .startswith ("+" )
274
- and marker [1 ] in EMBED_PART_START_CHAR_STYLES
275
- and marker != "fm"
276
- )
0 commit comments