Skip to content

Commit

Permalink
Added audio spliting commandline parameters as requested in issue eli…
Browse files Browse the repository at this point in the history
  • Loading branch information
michael committed Feb 6, 2024
1 parent 98c708d commit 3953599
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 9 deletions.
14 changes: 7 additions & 7 deletions audiosplitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def split_all_audios():
split_audio(wav, output_path)

# iterate through files in wavs, for each file get the length. if it's more than 12 seconds, split it into 12 second chunks at the first silence over 200ms
def split_long_audios():
def split_long_audios(min_silence_len = 300,silence_thresh = -60, keep_silence=300, seconds = 12):
# if wavs_split_temp and wavs_split_final don't exist, create them
if not os.path.exists('./wavs_split_temp'):
os.makedirs('./wavs_split_temp')
Expand All @@ -37,10 +37,10 @@ def split_long_audios():
# get the length of the audio
audio = AudioSegment.from_wav(wav)
length = round(audio.duration_seconds, 2)
# if the length is more than 12 seconds, split it into 12 second chunks
if length > 12:
# if the length is more than seconds, split it into second chunks
if length > seconds:

chunks = split_on_silence(audio, min_silence_len=300, silence_thresh=-60, keep_silence=300)
chunks = split_on_silence(audio, min_silence_len=min_silence_len, silence_thresh=silence_thresh, keep_silence=keep_silence)
current_length = 0
current_split = 0
# out_data is an empty AudioSegment
Expand All @@ -50,7 +50,7 @@ def split_long_audios():
# also write if we're at the end of the file
current_length += round(chunk.duration_seconds, 2)
print(filename)
if current_length > 12 or (i == len(chunks) - 1 and len(chunks) > 1):
if current_length > seconds or (i == len(chunks) - 1 and len(chunks) > 1):
# export the chunk
out_file = './wavs_split_temp/' + filename + '_split' + str(current_split) + '.wav'
print("exporting", out_file)
Expand Down Expand Up @@ -90,7 +90,7 @@ def filter_short_audios():
print('omitting', wav)

if __name__ == "__main__":
split_all_audios();
split_long_audios();
# split_all_audios();
split_long_audios(min_silence_len = 300 ,silence_thresh = -60, keep_silence=300, seconds = 12);
filter_short_audios();
print('Final audio files are in ./wavs_split_final')
9 changes: 7 additions & 2 deletions pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,13 @@ def main():
description='chops up wav files and adds transcription, outputs in LJ Speech format')
parser = argparse.ArgumentParser()
parser.add_argument('-p', '--provider', help='Set transcription provider (google or whisper) default is whisper', default="whisper")
parser.add_argument('-k', '--speech_key', help='Google Speech API Key')
parser.add_argument('-a', '--speech_key', help='Google Speech API Key')
parser.add_argument('-m', '--model', help='Open AI Whisper model (tiny, base, small, medium, large, large-v2, or large-v3) to use, default large-v3', default="large-v3")
parser.add_argument('-s', '--seconds', help='Set the number seconds per wav file.', default=12)
parser.add_argument('-l', '--min_silence_len', help='Set the min_silence_len for audio segmenting', default=300)
parser.add_argument('-t', '--silence_thresh', help='Set silence_thresh for audio segmenting', default=-60)
parser.add_argument('-k', '--keep_silence', help='Set keep_silence for audio segmenting', default=300)

args = parser.parse_args()

if args.provider == "google" and args.speech_key == None:
Expand All @@ -48,7 +53,7 @@ def main():
# split_all_audios();

# simply split and move the clips
split_long_audios();
split_long_audios(min_silence_len = args.min_silence_len ,silence_thresh = args.silence_thresh, keep_silence=args.keep_silence, seconds = args.seconds);
filter_short_audios();

# 2. transcribe audio files with transcriber
Expand Down

0 comments on commit 3953599

Please sign in to comment.