From e1643e3caea449dea5ce3f5d74432772beb7d5be Mon Sep 17 00:00:00 2001 From: iyed Mdimegh Date: Mon, 30 Mar 2026 13:26:52 +0100 Subject: [PATCH] fix: enforce connection and read timeouts on external audio downloads --- app/data/audio_data.py | 8 ++++++-- tests/unit/test_data/test_audio_data.py | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/app/data/audio_data.py b/app/data/audio_data.py index ee94050..483dabf 100644 --- a/app/data/audio_data.py +++ b/app/data/audio_data.py @@ -35,7 +35,7 @@ def fetch_audio(self, url: str): logger.debug(f"[debug] [Data Layer] [AudioDataLayer] [fetch_audio] Downloading audio file from URL: {url}") # print(f"[debug] [Data Layer] [AudioDataLayer] [fetch_audio] Downloading audio file from URL: {url}") try: - url_response = requests.get(url) + url_response = requests.get(url, timeout=(10, 300)) if url_response.status_code != 200: # Capture and format the error message for the upper layers error_message = f'An error occurred during the HTTP request: HTTP status: {url_response.status_code}' @@ -45,6 +45,9 @@ def fetch_audio(self, url: str): # Load audio file into pydub from the response content return AudioSegment.from_file(BytesIO(url_response.content)) + + except requests.exceptions.Timeout as timeout_err: + raise RuntimeError(f'Audio download timed out (connection: 10s, read: 300s): {url}') from timeout_err except requests.exceptions.RequestException as req_err: # Handle any specific errors related to HTTP requests @@ -66,7 +69,8 @@ def fetch_audio(self, url: str): # print(f"[error] [Data Layer] [AudioDataLayer] [fetch_audio] {error_message}") return {'error': error_message} - + except RuntimeError: + raise except Exception as e: # Catch any other exceptions logger.error(f"[error] [Data Layer] [AudioDataLayer] [fetch_audio] An unexpected error occurred: {str(e)}") diff --git a/tests/unit/test_data/test_audio_data.py b/tests/unit/test_data/test_audio_data.py index 7ae39ef..b27ab32 100644 --- a/tests/unit/test_data/test_audio_data.py +++ b/tests/unit/test_data/test_audio_data.py @@ -88,7 +88,7 @@ def test_fetch_audio_from_url_failure(self, audio_data_layer, mock_requests__get result = audio_data_layer.fetch_audio(**payload) # Assert that mock_requests__get is called with the correct URL - mock_requests__get.assert_called_once_with(payload['url']) + mock_requests__get.assert_called_once_with(payload['url'], timeout=(10, 300)) assert result == {'error': f'An error occurred during the HTTP request: HTTP status: {mock_response.status_code}'} @@ -121,7 +121,7 @@ def test_fetch_audio_from_url_success(self, audio_data_layer, mock_requests__get result = audio_data_layer.fetch_audio(**payload) # Assert that mock_requests_get is called with the correct URL - mock_requests__get.assert_called_once_with(payload['url']) + mock_requests__get.assert_called_once_with(payload['url'], timeout=(10, 300)) # Assert the mock_io__BytesIO is called with the correct audio data mock_io__BytesIO.assert_called_once_with(mock_response.content) @@ -131,6 +131,21 @@ def test_fetch_audio_from_url_success(self, audio_data_layer, mock_requests__get assert result == 'mock_audio_data' + def test_fetch_audio_from_url_timeout(self, audio_data_layer, mock_requests__get): + """ + Test timeout during fetch from a URL. + """ + mock_requests__get.side_effect = requests.exceptions.Timeout('mock timeout') + + payload = self.args + with pytest.raises(RuntimeError) as err: + audio_data_layer.fetch_audio(**payload) + + # Assert that mock_requests__get is called with the correct URL and timeout + mock_requests__get.assert_called_once_with(payload['url'], timeout=(10, 300)) + + assert str(err.value) == f"Audio download timed out (connection: 10s, read: 300s): {payload['url']}" + def test_fetch_audio_from_local_path_success(self, audio_data_layer, mock_audio_segment__from_file,mock_os__path_exists,mock_os__path_isfile):