-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathsegmentizer.py
150 lines (111 loc) · 4.99 KB
/
segmentizer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
from segment import Segment
def segmentize(gentle_outputs, audio_file,
anchor_length):
"""
Takes in Gentle output (list of Word objects)
Converts the list storing each word into a
list of Segment Objects in order to break up
Gentle's output into Anchor Points and recursive points.
Anchor Point is defined as a set of consecutively aligned
words whose length is greater than the defined anchor length
Parameters
-----------------
Gentle_outputs: the direct output of Gentle
audio_file: a pydub object storing an audio_file
anchor_length: int to determine criteria for anchor Seg
"""
# variables to help with bounding Segments
correct_count = 0
end_prev_anchor = 0
first_correct_index = None
# convenience variable
total_gentle_len = len(gentle_outputs)
# Array to store all final segments
segs = []
# run through the list of Word objects
for index, word in enumerate(gentle_outputs):
if word.success():
# if the word was successfully aligned
# update variable values and move on
correct_count += 1
# update first_correct tracker for later bounding
if first_correct_index is None:
first_correct_index = index
# if word is unaligned, check if current
elif correct_count >= anchor_length:
# Make sure that the unaligned segment exists
# Would throw an error if the audio file began
#with an anchor point
if end_prev_anchor != first_correct_index:
# load the previous unanchored words as a Segment
seg = get_segment(gentle_outputs[end_prev_anchor: \
first_correct_index], False, audio_file,
total_gentle_len)
segs.append(seg)
# Load the current ancor words as a Segment
seg = get_segment(gentle_outputs[first_correct_index: \
index], True, audio_file, total_gentle_len)
segs.append(seg)
# set the end prev_anchor tracker
# to the current location
end_prev_anchor = index
# reset counter variables
correct_count = 0
first_correct_index = None
# Resets counter variables if the
# current word is unaligned and is less
# than the anchor length
elif index < len(gentle_outputs) - 1:
# reset counter variables
correct_count = 0
first_correct_index = None
# if we have reached the end of the audio file
# we need to segmentize all the remaining
# unsegmented part of the transcript/audiofile
# and reaccount for all cases
if index == len(gentle_outputs) - 1:
# Case: current seg is an anchor point
# store unanchored segment
# then store anchored segment
if correct_count >= anchor_length:
if end_prev_anchor != first_correct_index:
# get previous unanchored seg
seg = get_segment(gentle_outputs[end_prev_anchor: \
first_correct_index], False, audio_file,
total_gentle_len)
# store previous unanchored seg
segs.append(seg)
# get the anchor segment
seg = get_segment(gentle_outputs[first_correct_index:], \
True, audio_file, total_gentle_len)
# store the anchor seg
segs.append(seg)
# update end of prev anchor tracker
end_prev_anchor = index
# Case: current segment does not qualify as an anchor point
# then just store all the remaining words as an unanchored segment
else:
# store the previous unanchored segments as a seg- append
seg = get_segment(gentle_outputs[end_prev_anchor:], \
False, audio_file, total_gentle_len)
segs.append(seg)
return segs
def get_segment(bounded_gentle_output, is_anchor, audio_file, total_gentle_len):
"""
Helper function to easily convert a bounded
portion of gentle output into a Segment
Parameters
--------------
bounded_gentle_output: portion of gentle output within Segment
is_anchor - bool: whether segment is an anchor point or not
audio_file: pydub object of bounded audio file
total_gentle_len: length of gentle_output passed into segmentize
Used to check for improvement in recurse
"""
# relative audio start time plus the audio time of the first/last word
audio_start = bounded_gentle_output[0].start
audio_finish = bounded_gentle_output[-1].end
seg = Segment(audio_start, audio_finish,
bounded_gentle_output, is_anchor, audio_file,
total_gentle_len)
return seg