11
11
from .verse import Verse
12
12
13
13
14
+ class QuotationMarkCounter :
15
+ _NEGLIGIBLE_PROPORTION_THRESHOLD = 0.01
16
+
17
+ def __init__ (self ):
18
+ self .reset ()
19
+
20
+ def reset (self ) -> None :
21
+ self ._quotation_mark_counts : Dict [str , int ] = defaultdict (int )
22
+ self ._total_quotation_mark_count : int = 0
23
+
24
+ def count_quotation_marks (self , quotation_marks : List [QuotationMarkStringMatch ]) -> None :
25
+ for quotation_mark_match in quotation_marks :
26
+ mark : str = quotation_mark_match .quotation_mark
27
+ self ._quotation_mark_counts [mark ] += 1
28
+ self ._total_quotation_mark_count += 1
29
+
30
+ def is_quotation_mark_proportion_negligible (self , quotation_mark : str ) -> bool :
31
+ if self ._total_quotation_mark_count == 0 :
32
+ return True
33
+ return (
34
+ self ._quotation_mark_counts [quotation_mark ] / self ._total_quotation_mark_count
35
+ < self ._NEGLIGIBLE_PROPORTION_THRESHOLD
36
+ )
37
+
38
+
14
39
class ApostropheProportionStatistics :
15
40
def __init__ (self ):
16
41
self .reset ()
@@ -260,11 +285,13 @@ def __init__(self, quote_conventions: QuoteConventionSet):
260
285
self ._quote_conventions = quote_conventions
261
286
self ._apostrophe_analyzer = PreliminaryApostropheAnalyzer ()
262
287
self ._quotation_mark_sequences = QuotationMarkSequences ()
288
+ self ._quotation_mark_counts = QuotationMarkCounter ()
263
289
self .reset ()
264
290
265
291
def reset (self ) -> None :
266
292
self ._apostrophe_analyzer .reset ()
267
293
self ._quotation_mark_sequences .reset ()
294
+ self ._quotation_mark_counts .reset ()
268
295
269
296
def narrow_down_possible_quote_conventions (self , chapters : List [Chapter ]) -> QuoteConventionSet :
270
297
for chapter in chapters :
@@ -281,6 +308,7 @@ def _analyze_quotation_marks_for_verse(self, verse: Verse) -> None:
281
308
).find_all_potential_quotation_marks_in_verse (verse )
282
309
self ._analyze_quotation_mark_sequence (quotation_marks )
283
310
self ._apostrophe_analyzer .process_quotation_marks (verse .text_segments , quotation_marks )
311
+ self ._quotation_mark_counts .count_quotation_marks (quotation_marks )
284
312
285
313
def _analyze_quotation_mark_sequence (self , quotation_marks : List [QuotationMarkStringMatch ]) -> None :
286
314
quotation_mark_grouper : QuotationMarkGrouper = QuotationMarkGrouper (quotation_marks , self ._quote_conventions )
@@ -304,6 +332,8 @@ def _find_opening_quotation_marks(self) -> List[str]:
304
332
]
305
333
306
334
def _is_opening_quotation_mark (self , quotation_mark : str ) -> bool :
335
+ if self ._quotation_mark_counts .is_quotation_mark_proportion_negligible (quotation_mark ):
336
+ return False
307
337
if self ._apostrophe_analyzer .is_apostrophe_only (quotation_mark ):
308
338
return False
309
339
@@ -323,6 +353,8 @@ def _find_closing_quotation_marks(self) -> List[str]:
323
353
]
324
354
325
355
def _is_closing_quotation_mark (self , quotation_mark : str ) -> bool :
356
+ if self ._quotation_mark_counts .is_quotation_mark_proportion_negligible (quotation_mark ):
357
+ return False
326
358
if self ._apostrophe_analyzer .is_apostrophe_only (quotation_mark ):
327
359
return False
328
360
0 commit comments