Skip to content

Commit

Permalink
Fix edge case that segment cant be further segmented.
Browse files Browse the repository at this point in the history
  • Loading branch information
zh-plus committed Jun 17, 2024
1 parent 988fd07 commit fb292c6
Showing 1 changed file with 7 additions and 0 deletions.
7 changes: 7 additions & 0 deletions openlrc/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,10 @@ def is_punct(char):
former_words = seg_entry.words[:len(seg_entry.words) // 2]
latter_words = seg_entry.words[len(seg_entry.words) // 2:]

if not former_words or not latter_words:
logger.warning(f'Empty former_words or latter_words: {former_words} or {latter_words}, skip')
return [seg_entry]

former = seg_from_words(seg_entry, seg_entry.id, former_words, seg_entry.tokens[:len(former_words)])
latter = seg_from_words(seg_entry, seg_entry.id + 1, latter_words, seg_entry.tokens[len(former_words):])

Expand Down Expand Up @@ -177,6 +181,9 @@ def recursive_segment(entry):
if entry.end - entry.start > 10:
# split if duration > 10s
segmented_entries = mid_split(entry)
if len(segmented_entries) == 1: # if cant be further segmented
return [entry]

further_segmented = []
for segment in segmented_entries:
further_segmented.extend(recursive_segment(segment))
Expand Down

0 comments on commit fb292c6

Please sign in to comment.