Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement S3 Lifecycle Policy for Temporary Audio Cleanup and Error Handling (Fixes #165) #169

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 70 additions & 0 deletions aws_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import boto3
from botocore.exceptions import ClientError
import logging

logger = logging.getLogger(__name__)

class S3Service:
def __init__(self, bucket_name='audio-transcribe-temp'):
self.s3_client = boto3.client('s3')
self.bucket_name = bucket_name
self._setup_lifecycle_policy()

def _setup_lifecycle_policy(self):
"""
Sets up a lifecycle policy that automatically deletes objects after 1 day
and aborted multipart uploads after 1 day.
"""
try:
lifecycle_config = {
'Rules': [
{
'ID': 'DeleteTempAudioFiles',
'Status': 'Enabled',
'Filter': {
'Prefix': '' # Apply to all objects
},
'Expiration': {
'Days': 1 # Delete objects after 1 day
},
'AbortIncompleteMultipartUpload': {
'DaysAfterInitiation': 1
}
}
]
}

self.s3_client.put_bucket_lifecycle_configuration(
Bucket=self.bucket_name,
LifecycleConfiguration=lifecycle_config
)
logger.info(f"Successfully set up lifecycle policy for bucket {self.bucket_name}")
except ClientError as e:
logger.error(f"Failed to set up lifecycle policy: {str(e)}")
raise

def upload_audio(self, file_path, s3_key):
"""
Uploads an audio file to S3 with error handling
"""
try:
self.s3_client.upload_file(file_path, self.bucket_name, s3_key)
logger.info(f"Successfully uploaded {file_path} to {s3_key}")
return True
except ClientError as e:
logger.error(f"Failed to upload {file_path}: {str(e)}")
raise

def delete_audio(self, s3_key):
"""
Deletes an audio file from S3 with error handling
"""
try:
self.s3_client.delete_object(Bucket=self.bucket_name, Key=s3_key)
logger.info(f"Successfully deleted {s3_key}")
return True
except ClientError as e:
logger.error(f"Failed to delete {s3_key}: {str(e)}")
raise

# Fixes #165
50 changes: 47 additions & 3 deletions services.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,44 @@ def create_s3_bucket_if_not_exists(self, bucket_name):
Bucket=bucket_name,
CreateBucketConfiguration={'LocationConstraint': self.region_name}
)
# Set up lifecycle policy after bucket creation
self._setup_lifecycle_policy(bucket_name)
else:
raise
else:
# Ensure lifecycle policy exists for existing bucket
self._setup_lifecycle_policy(bucket_name)

def _setup_lifecycle_policy(self, bucket_name):
"""Sets up a lifecycle policy that automatically deletes objects after 1 day"""
try:
lifecycle_config = {
'Rules': [
{
'ID': 'DeleteTempAudioFiles',
'Status': 'Enabled',
'Filter': {
'Prefix': '' # Apply to all objects
},
'Expiration': {
'Days': 1 # Delete objects after 1 day
},
'AbortIncompleteMultipartUpload': {
'DaysAfterInitiation': 1
}
}
]
}

self.s3_client.put_bucket_lifecycle_configuration(
Bucket=bucket_name,
LifecycleConfiguration=lifecycle_config
)
logger.info(f"Successfully set up lifecycle policy for bucket {bucket_name}")
except ClientError as e:
logger.error(f"Failed to set up lifecycle policy: {str(e)}")
# Don't raise the error as this is not critical for functionality
logger.warning("Continuing without lifecycle policy")

def upload_file_to_s3(self, file_content, bucket_name, object_key):
self.s3_client.upload_fileobj(BytesIO(file_content), bucket_name, object_key)
Expand Down Expand Up @@ -56,6 +92,7 @@ def __init__(self, aws_services: AWSServices):
self.bucket_name = 'audio-transcribe-temp'

def transcribe_audio(self, file_url: str) -> str:
object_key = None
try:
self.aws_services.create_s3_bucket_if_not_exists(self.bucket_name)
logger.info(f"S3 Bucket created/confirmed: {self.bucket_name}")
Expand All @@ -70,12 +107,19 @@ def transcribe_audio(self, file_url: str) -> str:
logger.info(f"Transcription job started: {job_name}")

transcription = self._wait_for_transcription(job_name)
self.aws_services.delete_file_from_s3(self.bucket_name, object_key)

return transcription
except Exception as e:
logger.error(f"An error occurred: {e}")
logger.error(f"An error occurred during transcription: {e}")
raise
finally:
# Always try to clean up the temporary file, even if transcription fails
if object_key:
try:
self.aws_services.delete_file_from_s3(self.bucket_name, object_key)
logger.info(f"Cleaned up temporary file: {object_key}")
except Exception as cleanup_error:
logger.warning(f"Failed to clean up temporary file {object_key}: {cleanup_error}")
# Don't raise the cleanup error as the file will be removed by lifecycle policy

def _download_audio(self, file_url: str) -> bytes:
response = requests.get(file_url)
Expand Down