From 40d3b5b7491ed0e64288c424139634fbcbf10d0b Mon Sep 17 00:00:00 2001
From: Jamie McMillan <jamie.mcmillan@satellitevu.com>
Date: Mon, 7 Oct 2024 09:18:12 +0100
Subject: [PATCH 1/7] permit minor and patch version update

---
 requirements.txt | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5893b85..a945fe4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,9 +1,9 @@
-transformers==4.36.2
-torch==2.1.2
-torchaudio==2.1.2
-pydub==0.25.1
-pyannote.audio==3.1.1
-speechbrain==0.5.16
-accelerate==0.26.1
-faster-whisper==0.10.1
-openai-whisper==20231117
\ No newline at end of file
+transformers>=4.36.2, <5.0.0
+torch>=2.1.2, <3.0.0
+torchaudio>=2.1.2, <3.0.0
+pydub>=0.25.1, <1.0.0
+pyannote.audio>=3.1.1, <4.0.0
+speechbrain>=0.5.16, <1.0.0
+accelerate>=0.26.1, <1.0.0
+faster-whisper>=0.10.1, <1.0.0
+openai-whisper>=20231117, <2024*

From 3c2442e54a9f173b1ee93ef4cb5ba9c201e49680 Mon Sep 17 00:00:00 2001
From: Jamie McMillan <jamie.mcmillan@satellitevu.com>
Date: Mon, 7 Oct 2024 09:29:39 +0100
Subject: [PATCH 2/7] More flexible requirements definition

---
 .gitignore       | 5 ++++-
 requirements.txt | 2 +-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index 5957730..eb4509c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,4 +2,7 @@ venv
 build
 dist
 speechlib.egg-info
-.env
\ No newline at end of file
+.env
+
+*.swp
+*.swo
diff --git a/requirements.txt b/requirements.txt
index a945fe4..476b184 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -6,4 +6,4 @@ pyannote.audio>=3.1.1, <4.0.0
 speechbrain>=0.5.16, <1.0.0
 accelerate>=0.26.1, <1.0.0
 faster-whisper>=0.10.1, <1.0.0
-openai-whisper>=20231117, <2024*
+openai-whisper>=20231117, <20240927

From f834682e56457ecb60e3df56e9524c67382ee993 Mon Sep 17 00:00:00 2001
From: Jamie McMillan <jamie.mcmillan@satellitevu.com>
Date: Mon, 7 Oct 2024 09:54:27 +0100
Subject: [PATCH 3/7] Ignoring additional editor and audio files

---
 .gitignore | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/.gitignore b/.gitignore
index eb4509c..b4d9460 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,8 @@ speechlib.egg-info
 
 *.swp
 *.swo
+
+# By default do not include these files for version control
+# Override this by using 'git add -f'
+*.wav
+*.mp3

From dc1bdf65ef67b7f7a880b900cca1c6a6333ad602 Mon Sep 17 00:00:00 2001
From: Jamie McMillan <jamie.mcmillan@satellitevu.com>
Date: Mon, 7 Oct 2024 09:54:54 +0100
Subject: [PATCH 4/7] Additional context encountered during bootstrapping an
 environment

---
 README.md | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index d92fe32..d8eadf0 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,9 @@ Transcriptor method takes 7 arguments.
 
 4. model size ("tiny", "small", "medium", "large", "large-v1", "large-v2", "large-v3")
 
-5. ACCESS_TOKEN: huggingface acccess token (also get permission to access `pyannote/speaker-diarization@2.1`)
+5. ACCESS_TOKEN: huggingface acccess token
+    1. Permission to access `pyannote/speaker-diarization@2.1` and `pyannote/segmentation`
+    2. Token requires permission for 'Read access to contents of all public gated repos you can access'
 
 6. voices_folder (contains speaker voice samples for speaker recognition)
 
@@ -211,4 +213,4 @@ This library uses following huggingface models:
 
 #### https://huggingface.co/speechbrain/spkrec-ecapa-voxceleb
 #### https://huggingface.co/Ransaka/whisper-tiny-sinhala-20k-8k-steps-v2
-#### https://huggingface.co/pyannote/speaker-diarization
\ No newline at end of file
+#### https://huggingface.co/pyannote/speaker-diarization

From b0aa4e3abedb4dbfbe850c554b05736e810a14f5 Mon Sep 17 00:00:00 2001
From: Jamie McMillan <jamie.mcmillan@satellitevu.com>
Date: Mon, 7 Oct 2024 09:56:13 +0100
Subject: [PATCH 5/7] Replaced the example code with an environment variable

To avoid accidental commits of API keys to the repo, this should be a bit safer at the cost of additional user onboarding complexity. Although the README script should still use hardcoded string which is helpful...
---
 examples/transcribe.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/examples/transcribe.py b/examples/transcribe.py
index fc86d75..6b31a45 100644
--- a/examples/transcribe.py
+++ b/examples/transcribe.py
@@ -1,3 +1,4 @@
+import os
 from speechlib import Transcriptor
 
 file = "obama_zach.wav"  # your audio file
@@ -6,7 +7,7 @@
 log_folder = "logs"      # log folder for storing transcripts
 modelSize = "tiny"     # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
 quantization = False   # setting this 'True' may speed up the process but lower the accuracy
-ACCESS_TOKEN = "your hf key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
+ACCESS_TOKEN = os.getenv("HUGGING_FACE_API_KEY") # get permission to access pyannote/speaker-diarization@2.1 on huggingface and add this to an environment variable
 
 # quantization only works on faster-whisper
 transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)
@@ -24,4 +25,4 @@
 res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
 
 # use assembly ai model
-res = transcriptor.assemby_ai_model("your api key")
\ No newline at end of file
+res = transcriptor.assemby_ai_model("your api key")

From 297231ac58cd16471c4d8a117c2207fc8ae5a83d Mon Sep 17 00:00:00 2001
From: Navodplayer1 <navodpeiris1234@gmail.com>
Date: Thu, 10 Oct 2024 00:14:32 +0530
Subject: [PATCH 6/7] fixed strict versioning of dependencies

---
 examples/.gitignore    | 3 ++-
 examples/transcribe.py | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/examples/.gitignore b/examples/.gitignore
index 8e174ff..811a5c3 100644
--- a/examples/.gitignore
+++ b/examples/.gitignore
@@ -9,4 +9,5 @@ greek_convo_short.mp3
 greek_convo_short.wav
 my_test.py
 greek_convo.mp3
-greek_convo.wav
\ No newline at end of file
+greek_convo.wav
+.env
\ No newline at end of file
diff --git a/examples/transcribe.py b/examples/transcribe.py
index 6b31a45..5723e96 100644
--- a/examples/transcribe.py
+++ b/examples/transcribe.py
@@ -7,7 +7,7 @@
 log_folder = "logs"      # log folder for storing transcripts
 modelSize = "tiny"     # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
 quantization = False   # setting this 'True' may speed up the process but lower the accuracy
-ACCESS_TOKEN = os.getenv("HUGGING_FACE_API_KEY") # get permission to access pyannote/speaker-diarization@2.1 on huggingface and add this to an environment variable
+ACCESS_TOKEN = "huggingface api key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
 
 # quantization only works on faster-whisper
 transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)
@@ -25,4 +25,4 @@
 res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
 
 # use assembly ai model
-res = transcriptor.assemby_ai_model("your api key")
+res = transcriptor.assemby_ai_model("assemblyAI api key")

From 7eb565dbab25cbccfc8f6bca7e15854f91ef68aa Mon Sep 17 00:00:00 2001
From: Navodplayer1 <navodpeiris1234@gmail.com>
Date: Thu, 10 Oct 2024 00:15:37 +0530
Subject: [PATCH 7/7] changed requirements

---
 README.md            | 7 +++++--
 library.md           | 9 +++++++--
 setup.py             | 4 ++--
 setup_instruction.md | 2 +-
 4 files changed, 15 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index d8eadf0..7a97444 100644
--- a/README.md
+++ b/README.md
@@ -88,13 +88,16 @@ transcript will also indicate the timeframe in seconds where each speaker speaks
 ### Transcription example:
 
 ```
+import os
+from speechlib import Transcriptor
+
 file = "obama_zach.wav"  # your audio file
 voices_folder = "" # voices folder containing voice samples for recognition
 language = "en"          # language code
 log_folder = "logs"      # log folder for storing transcripts
 modelSize = "tiny"     # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
 quantization = False   # setting this 'True' may speed up the process but lower the accuracy
-ACCESS_TOKEN = "your hf key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
+ACCESS_TOKEN = "huggingface api key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
 
 # quantization only works on faster-whisper
 transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)
@@ -112,7 +115,7 @@ res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt")
 res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
 
 # use assembly ai model
-res = transcriptor.assemby_ai_model("your api key")
+res = transcriptor.assemby_ai_model("assemblyAI api key")
 
 res --> [["start", "end", "text", "speaker"], ["start", "end", "text", "speaker"]...]
 ```
diff --git a/library.md b/library.md
index aba74a4..b7af440 100644
--- a/library.md
+++ b/library.md
@@ -70,13 +70,16 @@ transcript will also indicate the timeframe in seconds where each speaker speaks
 ### Transcription example:
 
 ```
+import os
+from speechlib import Transcriptor
+
 file = "obama_zach.wav"  # your audio file
 voices_folder = "" # voices folder containing voice samples for recognition
 language = "en"          # language code
 log_folder = "logs"      # log folder for storing transcripts
 modelSize = "tiny"     # size of model to be used [tiny, small, medium, large-v1, large-v2, large-v3]
 quantization = False   # setting this 'True' may speed up the process but lower the accuracy
-ACCESS_TOKEN = "your hf key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
+ACCESS_TOKEN = "huggingface api key" # get permission to access pyannote/speaker-diarization@2.1 on huggingface
 
 # quantization only works on faster-whisper
 transcriptor = Transcriptor(file, log_folder, language, modelSize, ACCESS_TOKEN, voices_folder, quantization)
@@ -94,7 +97,9 @@ res = transcriptor.custom_whisper("D:/whisper_tiny_model/tiny.pt")
 res = transcriptor.huggingface_model("Jingmiao/whisper-small-chinese_base")
 
 # use assembly ai model
-res = transcriptor.assemby_ai_model("your api key")
+res = transcriptor.assemby_ai_model("assemblyAI api key")
+
+res --> [["start", "end", "text", "speaker"], ["start", "end", "text", "speaker"]...]
 ```
 
 #### if you don't want speaker names: keep voices_folder as an empty string ""
diff --git a/setup.py b/setup.py
index 0dbef84..c8bb6df 100644
--- a/setup.py
+++ b/setup.py
@@ -5,7 +5,7 @@
 
 setup(
     name="speechlib",
-    version="1.1.9",  
+    version="1.1.10",  
     description="speechlib is a library that can do speaker diarization, transcription and speaker recognition on an audio file to create transcripts with actual speaker names. This library also contain audio preprocessor functions.",
     packages=find_packages(),
     long_description=long_description,
@@ -19,7 +19,7 @@
         "Programming Language :: Python :: 3.10",
         "Operating System :: OS Independent",
     ],
-    install_requires=["transformers", "torch", "torchaudio", "pydub", "pyannote.audio", "speechbrain==0.5.16", "accelerate", "faster-whisper", "openai-whisper", "assemblyai"],
+    install_requires=["transformers>=4.36.2, <5.0.0", "torch>=2.1.2, <3.0.0", "torchaudio>=2.1.2, <3.0.0", "pydub>=0.25.1, <1.0.0", "pyannote.audio>=3.1.1, <4.0.0", "speechbrain>=0.5.16, <1.0.0", "accelerate>=0.26.1, <1.0.0", "faster-whisper>=0.10.1, <1.0.0", "openai-whisper>=20231117, <20240927", "assemblyai"],
     python_requires=">=3.8",
 )
 
diff --git a/setup_instruction.md b/setup_instruction.md
index a07ae94..bb32cda 100644
--- a/setup_instruction.md
+++ b/setup_instruction.md
@@ -9,7 +9,7 @@ for publishing:
     pip install twine
 
 for install locally for testing:
-    pip install dist/speechlib-1.1.9-py3-none-any.whl
+    pip install dist/speechlib-1.1.10-py3-none-any.whl
 
 finally run:
     twine upload dist/*