diff --git a/examples/transcribe.py b/examples/transcribe.py index 5723e96..e7e4da6 100644 --- a/examples/transcribe.py +++ b/examples/transcribe.py @@ -7,7 +7,7 @@ log_folder = "logs" # log folder for storing transcripts modelSize = "tiny" # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3] quantization = False # setting this 'True' may speed up the process but lower the accuracy -ACCESS_TOKEN = "huggingface api key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface +ACCESS_TOKEN = "huggingface access token" # get permission to access pyannote/speaker-diarization@2.1 on huggingface # quantization only works on faster-whisper transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization) @@ -16,13 +16,13 @@ res = transcriptor.whisper() # use faster-whisper (simply faster) -res = transcriptor.faster_whisper() +#res = transcriptor.faster_whisper() # use a custom trained whisper model -res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt") +#res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt") # use a huggingface whisper model -res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base") +#res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base") # use assembly ai model -res = transcriptor.assemby_ai_model("assemblyAI api key") +#res = transcriptor.assemby_ai_model("assemblyAI api key") diff --git a/requirements.txt b/requirements.txt index 476b184..47eaa7e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,9 +1,9 @@ -transformers>=4.36.2, <5.0.0 -torch>=2.1.2, <3.0.0 -torchaudio>=2.1.2, <3.0.0 -pydub>=0.25.1, <1.0.0 -pyannote.audio>=3.1.1, <4.0.0 -speechbrain>=0.5.16, <1.0.0 -accelerate>=0.26.1, <1.0.0 -faster-whisper>=0.10.1, <1.0.0 -openai-whisper>=20231117, <20240927 +transformers +torch +torchaudio +pydub +pyannote.audio +speechbrain +accelerate +faster-whisper +openai-whisper diff --git a/setup.py b/setup.py index c8bb6df..a6061fb 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setup( name="speechlib", - version="1.1.10", + version="1.1.11", description="speechlib is a library that can do speaker diarization, transcription and speaker recognition on an audio file to create transcripts with actual speaker names. This library also contain audio preprocessor functions.", packages=find_packages(), long_description=long_description, @@ -19,8 +19,6 @@ "Programming Language :: Python :: 3.10", "Operating System :: OS Independent", ], - install_requires=["transformers>=4.36.2, <5.0.0", "torch>=2.1.2, <3.0.0", "torchaudio>=2.1.2, <3.0.0", "pydub>=0.25.1, <1.0.0", "pyannote.audio>=3.1.1, <4.0.0", "speechbrain>=0.5.16, <1.0.0", "accelerate>=0.26.1, <1.0.0", "faster-whisper>=0.10.1, <1.0.0", "openai-whisper>=20231117, <20240927", "assemblyai"], + install_requires=["transformers", "torch", "torchaudio", "pydub", "pyannote.audio", "speechbrain", "accelerate", "faster-whisper", "openai-whisper", "assemblyai"], python_requires=">=3.8", -) - -# ["transformers==4.36.2", "torch==2.1.2", "torchaudio==2.1.2", "pydub==0.25.1", "pyannote.audio==3.1.1", "speechbrain==0.5.16", "accelerate==0.26.1", "faster-whisper==0.10.1", "openai-whisper==20231117"] \ No newline at end of file +) \ No newline at end of file diff --git a/setup_instruction.md b/setup_instruction.md index bb32cda..f095700 100644 --- a/setup_instruction.md +++ b/setup_instruction.md @@ -9,7 +9,7 @@ for publishing: pip install twine for install locally for testing: - pip install dist/speechlib-1.1.10-py3-none-any.whl + pip install dist/speechlib-1.1.11-py3-none-any.whl finally run: twine upload dist/* diff --git a/speechlib/run.py b/speechlib/run.py deleted file mode 100644 index 7fcfed2..0000000 --- a/speechlib/run.py +++ /dev/null @@ -1,12 +0,0 @@ -from speechlib import Transcriptor - -file = "example1.wav" -voice_folder = "voices" -language = "sinhala" -log_folder = "logs" - -transcriptor = Transcriptor(file, log_folder, language, voice_folder) - -res = transcriptor.transcribe() - -print("res", res) \ No newline at end of file diff --git a/speechlib/speaker_recognition.py b/speechlib/speaker_recognition.py index 0c95fdf..0ae899f 100644 --- a/speechlib/speaker_recognition.py +++ b/speechlib/speaker_recognition.py @@ -1,4 +1,4 @@ -from speechbrain.pretrained import SpeakerRecognition +from speechbrain.inference import SpeakerRecognition import os from pydub import AudioSegment from collections import defaultdict