forked from elizaOS/LJSpeechTools
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtranscriber.py
68 lines (53 loc) · 2.52 KB
/
transcriber.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import glob
import speech_recognition as sr
from os import path, rename
from whisper_transcribe import Whisper_Transcriber
def transcribe(provider = "google", google_speech_api_key = None, model_name="large-v3"):
r = None
whisper_transcriber = None
if provider == "google":
if google_speech_api_key == None:
print("Error: Google speech api key is required to use google transcription option")
return
r = sr.Recognizer()
elif provider == "whisper":
whisper_transcriber = Whisper_Transcriber()
whisper_transcriber.init(model_name)
else:
print('Unknown transcription source')
return
wav_files = glob.glob('./wavs/*.wav')
metadata = []
# for each wav file
for fpath in wav_files:
print()
transcription = ''
try:
print(f"Transcribing : {fpath}")
if provider == "google":
with sr.AudioFile(fpath) as source:
audio = r.record(source.filename_or_fileobject) # read the entire audio file
transcription = r.recognize_google(audio, key=google_speech_api_key)
elif provider == "whisper":
transcription = whisper_transcriber.transcribe(audio_file_name=fpath)
print(" " + transcription)
metadata.append(fpath + "|" + transcription)
except Exception as error:
print('Skipping ' + fpath + ' : An Error occurred : '+ error)
metadata.append(fpath + "|" + "<ERROR>")
new_fpath = './ignore/' + path.basename(fpath)
# now move the file to the skipped directory
rename(fpath, new_fpath)
continue
metadata_txt = '\n'.join(metadata)
with open("metadata.csv", "w") as text_file:
text_file.write(metadata_txt)
if __name__ == "__main__":
# Use open ai whisper with large-v3 model, slow but best result
transcribe(provider="whisper")
# Use open ai whisper with large-v3 model, slow but best result
# valid responses : tiny, base, small, medium, large, large-v2, or large-v3
# transcribe(provider="whisper", model_name = "tiny")
# Use Google Speech API. In previous versions of LJ Speech Toolset relied on an embedded ket in
# the speech recoginition library, that has been revoked. Toy will need to provide your own API KEY
# transcribe(provider="google", google_speech_api_key="[PLACE YOUR GOOGLE SPEECH KEY HERE")