Skip to content

Commit

Permalink
Added default audio example
Browse files Browse the repository at this point in the history
  • Loading branch information
Pete Warden committed Apr 14, 2024
1 parent af4b428 commit 7192fc0
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 2 deletions.
Binary file added audio/two_cities.wav
Binary file not shown.
13 changes: 11 additions & 2 deletions lab4/speech_to_text.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import argparse
import time

from faster_whisper import WhisperModel, available_models

Expand All @@ -9,7 +10,7 @@
parser = argparse.ArgumentParser()
parser.add_argument(
"--audio_file",
default="daisy_HAL_9000.mp3",
default="../audio/two_cities.wav",
help="Input audio")
parser.add_argument(
"--model_size",
Expand All @@ -23,17 +24,25 @@
"--beam_size",
type=int,
default=5,
help=f"How wide an array to use for the beam search")
help=f"How wide an array to use for the beam search (smaller is faster but less accurate)")

args = parser.parse_args()

model = WhisperModel(args.model_size, device="cpu",
compute_type=args.compute_type)

transcribe_start_time = time.time()
segments, info = model.transcribe(args.audio_file, beam_size=args.beam_size)
transcribe_end_time = time.time()
transcribe_duration = transcribe_end_time - transcribe_start_time
print("Transcribe time: {:.3f}ms".format(transcribe_duration * 1000))

print("Detected language '%s' with probability %f" %
(info.language, info.language_probability))

segment_start_time = time.time()
for segment in segments:
print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
segment_end_time = time.time()
segment_duration = segment_end_time - segment_start_time
print("Segment time: {:.3f}ms".format(segment_duration * 1000))

0 comments on commit 7192fc0

Please sign in to comment.