fixed numpy issue and updated code to use speechbrain 1.0

NavodPeiris · Feb 11, 2025 · 87d6d87 · 87d6d87
1 parent fcd9ccb
commit 87d6d87
Show file tree

Hide file tree

Showing 6 changed files with 19 additions and 33 deletions.
diff --git a/examples/transcribe.py b/examples/transcribe.py
@@ -7,7 +7,7 @@
 log_folder = "logs"      # log folder for storing transcripts
 modelSize = "tiny"     # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
 quantization = False   # setting this 'True' may speed up the process but lower the accuracy
-ACCESS_TOKEN = "huggingface api key" # get permission to access pyannote/[email protected] on huggingface
+ACCESS_TOKEN = "huggingface access token" # get permission to access pyannote/[email protected] on huggingface
 
 # quantization only works on faster-whisper
 transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)
@@ -16,13 +16,13 @@
 res = transcriptor.whisper()
 
 # use faster-whisper (simply faster)
-res = transcriptor.faster_whisper()
+#res = transcriptor.faster_whisper()
 
 # use a custom trained whisper model
-res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt")
+#res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt")
 
 # use a huggingface whisper model
-res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
+#res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
 
 # use assembly ai model
-res = transcriptor.assemby_ai_model("assemblyAI api key")
+#res = transcriptor.assemby_ai_model("assemblyAI api key")
diff --git a/requirements.txt b/requirements.txt
@@ -1,9 +1,9 @@
-transformers>=4.36.2, <5.0.0
-torch>=2.1.2, <3.0.0
-torchaudio>=2.1.2, <3.0.0
-pydub>=0.25.1, <1.0.0
-pyannote.audio>=3.1.1, <4.0.0
-speechbrain>=0.5.16, <1.0.0
-accelerate>=0.26.1, <1.0.0
-faster-whisper>=0.10.1, <1.0.0
-openai-whisper>=20231117, <20240927
+transformers
+torch
+torchaudio
+pydub
+pyannote.audio
+speechbrain
+accelerate
+faster-whisper
+openai-whisper
diff --git a/setup.py b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="speechlib",
-    version="1.1.10",  
+    version="1.1.11",  
     description="speechlib is a library that can do speaker diarization, transcription and speaker recognition on an audio file to create transcripts with actual speaker names. This library also contain audio preprocessor functions.",
     packages=find_packages(),
     long_description=long_description,
@@ -19,8 +19,6 @@
         "Programming Language :: Python :: 3.10",
         "Operating System :: OS Independent",
     ],
-    install_requires=["transformers>=4.36.2, <5.0.0", "torch>=2.1.2, <3.0.0", "torchaudio>=2.1.2, <3.0.0", "pydub>=0.25.1, <1.0.0", "pyannote.audio>=3.1.1, <4.0.0", "speechbrain>=0.5.16, <1.0.0", "accelerate>=0.26.1, <1.0.0", "faster-whisper>=0.10.1, <1.0.0", "openai-whisper>=20231117, <20240927", "assemblyai"],
+    install_requires=["transformers", "torch", "torchaudio", "pydub", "pyannote.audio", "speechbrain", "accelerate", "faster-whisper", "openai-whisper", "assemblyai"],
     python_requires=">=3.8",
-)
-
-# ["transformers==4.36.2", "torch==2.1.2", "torchaudio==2.1.2", "pydub==0.25.1", "pyannote.audio==3.1.1", "speechbrain==0.5.16", "accelerate==0.26.1", "faster-whisper==0.10.1", "openai-whisper==20231117"]
+)
diff --git a/setup_instruction.md b/setup_instruction.md
@@ -9,7 +9,7 @@ for publishing:
     pip install twine
 
 for install locally for testing:
-    pip install dist/speechlib-1.1.10-py3-none-any.whl
+    pip install dist/speechlib-1.1.11-py3-none-any.whl
 
 finally run:
     twine upload dist/*

diff --git a/speechlib/run.py b/speechlib/run.py
diff --git a/speechlib/speaker_recognition.py b/speechlib/speaker_recognition.py
@@ -1,4 +1,4 @@
-from speechbrain.pretrained import SpeakerRecognition
+from speechbrain.inference import SpeakerRecognition
 import os
 from pydub import AudioSegment
 from collections import defaultdict