From 3d232fdf21619562792b408b8dd50e15bd2a08e0 Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Tue, 21 Feb 2023 13:44:28 +0000
Subject: [PATCH 1/6] add speechmatics

---
 README.rst                     |  8 +++++++
 examples/audio_transcribe.py   | 10 +++++++++
 speech_recognition/__init__.py | 41 ++++++++++++++++++++++++++++++++++
 3 files changed, 59 insertions(+)

diff --git a/README.rst b/README.rst
index f9bde14e..3e1470d1 100644
--- a/README.rst
+++ b/README.rst
@@ -39,6 +39,7 @@ Speech recognition engine/API support:
 * `Tensorflow <https://www.tensorflow.org/>`__
 * `Vosk API <https://github.com/alphacep/vosk-api/>`__ (works offline)
 * `OpenAI whisper <https://github.com/openai/whisper>`__ (works offline)
+* `Speechmatics ASR API <https://portal.speechmatics.com/>`__
 
 **Quickstart:** ``pip install SpeechRecognition``. See the "Installing" section for more details.
 
@@ -95,6 +96,7 @@ To use all of the functionality of the library, you should have:
 * **FLAC encoder** (required only if the system is not x86-based Windows/Linux/OS X)
 * **Vosk** (required only if you need to use Vosk API speech recognition ``recognizer_instance.recognize_vosk``)
 * **Whisper** (required only if you need to use Whisper ``recognizer_instance.recognize_whisper``)
+* **Speechmatics** (required only if you need to use Speechmatics ``recognizer_instance.recognize_speechmatics``)
 
 The following requirements are optional, but can improve or extend functionality in some situations:
 
@@ -169,6 +171,12 @@ Whisper is **required if and only if you want to use whisper** (``recognizer_ins
 
 You can install it with ``python3 -m pip install git+https://github.com/openai/whisper.git soundfile``.
 
+Speechmatics (for Speechmatics users)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Speechmatics is **required if and only if you want to use speechmatics** (``recognizer_instance.recognize_speechmatics``).
+
+You can install it with ``python3 -m pip install speechmatics-python``.
+
 Troubleshooting
 ---------------
 
diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py
index 7806023f..5d91b25b 100644
--- a/examples/audio_transcribe.py
+++ b/examples/audio_transcribe.py
@@ -13,6 +13,16 @@
 with sr.AudioFile(AUDIO_FILE) as source:
     audio = r.record(source)  # read the entire audio file
 
+# recognize speech using Speechmatics Speech to Text
+API_KEY = "INSERT API KEY HERE"
+try:
+    print("Speechmatics thinks you said " + r.recognize_ibm(audio, key=API_KEY))
+except sr.UnknownValueError:
+    print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+    print("Could not request results from the Speechmatics service; {0}".format(e))
+
+
 # recognize speech using Sphinx
 try:
     print("Sphinx thinks you said " + r.recognize_sphinx(audio))
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index 66ebc04c..ecc0bebb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1702,6 +1702,47 @@ def recognize_vosk(self, audio_data, language='en'):
         
         return finalRecognition
 
+    def recognize_speechmatics(self, audio_data, key=None, language="en", transcript_format="txt"):
+        """
+        Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Speechmatics ASR
+
+        The key value is your speechmatics API key. You can get an API key by creating an account and signing into the portal at https://portal.speechmatics.com/manage-access/.
+
+        The recognition language is determined by ``language``, an RFC5646 language tag like "en" or "es". The full list of supported languages can be found at https://docs.speechmatics.com/introduction/supported-languages.
+
+        Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
+
+        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+        """
+        assert isinstance(audio_data, AudioData), "Data must be audio data"
+        assert isinstance(key, str), "``key`` must be a string"
+
+        try:
+            from speechmatics.models import ConnectionSettings, BatchTranscriptionConfig
+            from speechmatics.batch_client import BatchClient
+            from speechmatics.constants import BATCH_SELF_SERVICE_URL
+        except:
+            raise RequestError("missing speechmatics python module: install using `pip install speechmatics-python`")
+
+        wav_data = audio_data.get_wav_data(
+            convert_rate=None if audio_data.sample_rate >= 16000 else 16000  # audio samples must be at least 16 kHz
+        )
+        audio_input = ("audio_file.wav", wav_data)
+        settings = ConnectionSettings(
+            url=BATCH_SELF_SERVICE_URL,
+            auth_token=key,
+        )
+        conf = BatchTranscriptionConfig(
+            language=language,
+        )
+        with BatchClient(settings) as client:
+            job_id = client.submit_job(
+                audio=audio_input,
+                transcription_config=conf,
+            )
+            transcript = client.wait_for_completion(job_id, transcription_format=transcript_format)
+            return transcript
+
 def get_flac_converter():
     """Returns the absolute path of a FLAC converter executable, or raises an OSError if none can be found."""
     flac_converter = shutil_which("flac")  # check for installed version first

From cce0869c83642c5dde42c00bdf926c51a10aca07 Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Tue, 21 Feb 2023 13:58:49 +0000
Subject: [PATCH 2/6] add examples and tests

---
 examples/audio_transcribe.py       |  6 +++---
 examples/extended_results.py       | 10 ++++++++++
 examples/microphone_recognition.py |  9 +++++++++
 tests/test_recognition.py          | 18 ++++++++++++++++++
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py
index 5d91b25b..aab2be8b 100644
--- a/examples/audio_transcribe.py
+++ b/examples/audio_transcribe.py
@@ -13,10 +13,10 @@
 with sr.AudioFile(AUDIO_FILE) as source:
     audio = r.record(source)  # read the entire audio file
 
-# recognize speech using Speechmatics Speech to Text
-API_KEY = "INSERT API KEY HERE"
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
 try:
-    print("Speechmatics thinks you said " + r.recognize_ibm(audio, key=API_KEY))
+    print("Speechmatics thinks you said " + r.recognize_ibm(audio, key=SPEECHMATICS_KEY))
 except sr.UnknownValueError:
     print("Speechmatics could not understand audio")
 except sr.RequestError as e:
diff --git a/examples/extended_results.py b/examples/extended_results.py
index 599c67f2..117f519b 100644
--- a/examples/extended_results.py
+++ b/examples/extended_results.py
@@ -16,6 +16,16 @@
 with sr.AudioFile(AUDIO_FILE) as source:
     audio = r.record(source)  # read the entire audio file
 
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+    print("Speechmatics results:")
+    pprint(r.recognize_sphinx(audio, key=SPEECHMATICS_KEY, transcript_format="json-v2"))
+except sr.UnknownValueError:
+    print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+    print("Speechmatics error; {0}".format(e))
+
 # recognize speech using Sphinx
 try:
     print("Sphinx thinks you said " + r.recognize_sphinx(audio))
diff --git a/examples/microphone_recognition.py b/examples/microphone_recognition.py
index 56168b29..863abe87 100644
--- a/examples/microphone_recognition.py
+++ b/examples/microphone_recognition.py
@@ -10,6 +10,15 @@
     print("Say something!")
     audio = r.listen(source)
 
+# recognize speech using Speechmatics
+SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
+try:
+    print("Speechmatics thinks you said " + r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY))
+except sr.UnknownValueError:
+    print("Speechmatics could not understand audio")
+except sr.RequestError as e:
+    print("Could not request results from Speechmatics service; {0}".format(e))
+
 # recognize speech using Sphinx
 try:
     print("Sphinx thinks you said " + r.recognize_sphinx(audio))
diff --git a/tests/test_recognition.py b/tests/test_recognition.py
index 5759d657..7e86ea94 100644
--- a/tests/test_recognition.py
+++ b/tests/test_recognition.py
@@ -34,6 +34,24 @@ def test_google_chinese(self):
         with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
         self.assertEqual(r.recognize_google(audio, language="zh-CN"), u"砸自己的脚")
 
+    @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+    def test_speechmatics_english(self):
+        r = sr.Recognizer()
+        with sr.AudioFile(self.AUDIO_FILE_EN) as source: audio = r.record(source)
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"]), "One, two, three.")
+
+    @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+    def test_speechmatics_french(self):
+        r = sr.Recognizer()
+        with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="fr"), u"Essaye la dictée numéro un.")
+
+    @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
+    def test_speechmatics_mandarin(self):
+        r = sr.Recognizer()
+        with sr.AudioFile(self.AUDIO_FILE_ZH) as source: audio = r.record(source)
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="cmn"), u"砸自己的脚。")
+
     @unittest.skipUnless("WIT_AI_KEY" in os.environ, "requires Wit.ai key to be specified in WIT_AI_KEY environment variable")
     def test_wit_english(self):
         r = sr.Recognizer()

From e371938c2d46d036b7bc570fd101dc6df7e1c51d Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Tue, 21 Feb 2023 14:32:38 +0000
Subject: [PATCH 3/6] fix copy-paste errors

---
 README.rst                   | 2 +-
 examples/audio_transcribe.py | 2 +-
 examples/extended_results.py | 2 +-
 tests/test_recognition.py    | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/README.rst b/README.rst
index 3e1470d1..604bc1a4 100644
--- a/README.rst
+++ b/README.rst
@@ -175,7 +175,7 @@ Speechmatics (for Speechmatics users)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Speechmatics is **required if and only if you want to use speechmatics** (``recognizer_instance.recognize_speechmatics``).
 
-You can install it with ``python3 -m pip install speechmatics-python``.
+You can install it with ``python3 -m pip install speechmatics-python``. You will also need an API key from `<https://portal.speechmatics.com/manage-access/>__`.
 
 Troubleshooting
 ---------------
diff --git a/examples/audio_transcribe.py b/examples/audio_transcribe.py
index aab2be8b..0736b02a 100644
--- a/examples/audio_transcribe.py
+++ b/examples/audio_transcribe.py
@@ -16,7 +16,7 @@
 # recognize speech using Speechmatics
 SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
 try:
-    print("Speechmatics thinks you said " + r.recognize_ibm(audio, key=SPEECHMATICS_KEY))
+    print("Speechmatics thinks you said " + r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY))
 except sr.UnknownValueError:
     print("Speechmatics could not understand audio")
 except sr.RequestError as e:
diff --git a/examples/extended_results.py b/examples/extended_results.py
index 117f519b..c848212b 100644
--- a/examples/extended_results.py
+++ b/examples/extended_results.py
@@ -20,7 +20,7 @@
 SPEECHMATICS_KEY = "INSERT SPEECHMATICS API KEY HERE"
 try:
     print("Speechmatics results:")
-    pprint(r.recognize_sphinx(audio, key=SPEECHMATICS_KEY, transcript_format="json-v2"))
+    pprint(r.recognize_speechmatics(audio, key=SPEECHMATICS_KEY, transcript_format="json-v2"))
 except sr.UnknownValueError:
     print("Speechmatics could not understand audio")
 except sr.RequestError as e:
diff --git a/tests/test_recognition.py b/tests/test_recognition.py
index 7e86ea94..96fade84 100644
--- a/tests/test_recognition.py
+++ b/tests/test_recognition.py
@@ -44,7 +44,7 @@ def test_speechmatics_english(self):
     def test_speechmatics_french(self):
         r = sr.Recognizer()
         with sr.AudioFile(self.AUDIO_FILE_FR) as source: audio = r.record(source)
-        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="fr"), u"Essaye la dictée numéro un.")
+        self.assertEqual(r.recognize_speechmatics(audio, key=os.environ["SPEECHMATICS_KEY"], language="fr"), u"C'est la dictée numéro un.")
 
     @unittest.skipUnless("SPEECHMATICS_KEY" in os.environ, "requires Speechmatics key to be specified in SPEECHMATICS_KEY environment variable")
     def test_speechmatics_mandarin(self):

From 650adc3bafa5737bcc88a31a4dbfc6120b3d1551 Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Tue, 21 Feb 2023 15:30:02 +0000
Subject: [PATCH 4/6] remove sample rate change

---
 speech_recognition/__init__.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index ecc0bebb..c836f257 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1724,9 +1724,7 @@ def recognize_speechmatics(self, audio_data, key=None, language="en", transcript
         except:
             raise RequestError("missing speechmatics python module: install using `pip install speechmatics-python`")
 
-        wav_data = audio_data.get_wav_data(
-            convert_rate=None if audio_data.sample_rate >= 16000 else 16000  # audio samples must be at least 16 kHz
-        )
+        wav_data = audio_data.get_wav_data()
         audio_input = ("audio_file.wav", wav_data)
         settings = ConnectionSettings(
             url=BATCH_SELF_SERVICE_URL,

From 800567ae7905544b459a251d37dc3fc3615f0d8b Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Tue, 21 Feb 2023 15:35:03 +0000
Subject: [PATCH 5/6] add speechmatics to library-reference.rst

---
 reference/library-reference.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 7323bd9b..6ca27092 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -314,6 +314,19 @@ You can translate the result to english with Whisper by passing translate=True
 
 Other values are passed directly to whisper. See https://github.com/openai/whisper/blob/main/whisper/transcribe.py for all options
 
+``recognize_speechmatics(self, audio_data, key=None, language="en", transcript_format="txt")``
+----------------------------------------------------------------------------------------------
+
+Performs speech recognition on ``audio_data`` (an ``AudioData`` instance), using the Speechmatics ASR
+
+The key value is your speechmatics API key. You can get an API key by creating an account and signing into the portal at https://portal.speechmatics.com/manage-access/.
+
+The recognition language is determined by ``language``, an RFC5646 language tag like "en" or "es". The full list of supported languages can be found at https://docs.speechmatics.com/introduction/supported-languages.
+
+Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
+
+Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+
 ``AudioSource``
 ---------------
 

From 8037f6aed15d66695459dc50ee7aa7b984dc2f8d Mon Sep 17 00:00:00 2001
From: Tudor Evans <tudore@speechmatics.com>
Date: Tue, 21 Feb 2023 15:37:43 +0000
Subject: [PATCH 6/6] add speechmatics to library-reference.rst

---
 reference/library-reference.rst | 2 +-
 speech_recognition/__init__.py  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/reference/library-reference.rst b/reference/library-reference.rst
index 6ca27092..c6bb2b43 100644
--- a/reference/library-reference.rst
+++ b/reference/library-reference.rst
@@ -325,7 +325,7 @@ The recognition language is determined by ``language``, an RFC5646 language tag
 
 Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
 
-Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+Raises errors directly from the speechmatics-python package. Read more at https://speechmatics.github.io/speechmatics-python/exceptions.html.
 
 ``AudioSource``
 ---------------
diff --git a/speech_recognition/__init__.py b/speech_recognition/__init__.py
index c836f257..2f3e62bb 100644
--- a/speech_recognition/__init__.py
+++ b/speech_recognition/__init__.py
@@ -1712,7 +1712,7 @@ def recognize_speechmatics(self, audio_data, key=None, language="en", transcript
 
         Returns a text representation of the transcript by default. You can alson get a json representation of the transcript by setting transcript_format='json-v2', which comes with a range of meta-data about each word in the transcript. The full transcript schema is documented here: https://docs.speechmatics.com/features. You can also request an SRT format by setting `format='srt'`
 
-        Raises a ``speech_recognition.UnknownValueError`` exception if the speech is unintelligible. Raises a ``speech_recognition.RequestError`` exception if the speech recognition operation failed, if the key isn't valid, or if there is no internet connection.
+        Raises errors directly from the speechmatics-python package. Read more at https://speechmatics.github.io/speechmatics-python/exceptions.html.
         """
         assert isinstance(audio_data, AudioData), "Data must be audio data"
         assert isinstance(key, str), "``key`` must be a string"