Merge pull request #471 from jhj0517/fix/handle-silent-audio

Leave a message for corrupted audio instead of raising error
jhj0517 · Jan 23, 2025 · 2b34a16 · 2b34a16
2 parents c7ed55b + 35f8c50
commit 2b34a16
Show file tree

Hide file tree

Showing 3 changed files with 32 additions and 1 deletion.
diff --git a/modules/utils/audio_manager.py b/modules/utils/audio_manager.py
@@ -0,0 +1,22 @@
+from typing import Optional, Union
+import soundfile as sf
+import os
+import numpy as np
+
+
+def validate_audio(audio: Optional[str] = None):
+    """Validate audio file and check if it's corrupted"""
+    if isinstance(audio, np.ndarray):
+        return True
+
+    if not os.path.exists(audio):
+        return False
+
+    try:
+        with sf.SoundFile(audio) as f:
+            if f.frames > 0:
+                return True
+            else:
+                return False
+    except Exception as e:
+        return False
diff --git a/modules/utils/subtitle_manager.py b/modules/utils/subtitle_manager.py
@@ -15,7 +15,8 @@
 def format_timestamp(
     seconds: float, always_include_hours: bool = True, decimal_marker: str = ","
 ) -> str:
-    assert seconds >= 0, "non-negative timestamp expected"
+    assert seconds is not None and seconds >= 0, "Wrong timestamp provided"
+
     milliseconds = round(seconds * 1000.0)
 
     hours = milliseconds // 3_600_000
@@ -232,6 +233,9 @@ def iterate_subtitles():
                     yield subtitle_start, subtitle_end, subtitle_text
         else:
             for segment in result["segments"]:
+                if segment["text"] is None:
+                    continue
+
                 segment_start = self.format_timestamp(segment["start"])
                 segment_end = self.format_timestamp(segment["end"])
                 segment_text = segment["text"].strip().replace("-->", "->")

diff --git a/modules/whisper/base_transcription_pipeline.py b/modules/whisper/base_transcription_pipeline.py
@@ -19,6 +19,7 @@
 from modules.utils.subtitle_manager import *
 from modules.utils.youtube_manager import get_ytdata, get_ytaudio
 from modules.utils.files_manager import get_media_files, format_gradio_files, load_yaml, save_yaml, read_file
+from modules.utils.audio_manager import validate_audio
 from modules.whisper.data_classes import *
 from modules.diarize.diarizer import Diarizer
 from modules.vad.silero_vad import SileroVAD
@@ -108,6 +109,10 @@ def run(self,
         elapsed_time: float
             elapsed time for running
         """
+        if not validate_audio(audio):
+            logger.info(f"The audio file {audio} is not able to open or corrupted. Please check the file.")
+            return [Segment()], -1
+
         params = TranscriptionPipelineParams.from_list(list(pipeline_params))
         params = self.validate_gradio_values(params)
         bgm_params, vad_params, whisper_params, diarization_params = params.bgm_separation, params.vad, params.whisper, params.diarization