Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement S3 Lifecycle Policy for Temporary Audio Cleanup and Error Handling (Fixes #172) #174

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions services.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,31 @@ def create_s3_bucket_if_not_exists(self, bucket_name):
)
else:
raise

# Set up lifecycle policy for automatic cleanup after 24 hours
try:
lifecycle_config = {
'Rules': [
{
'ID': 'DeleteTempAudioFiles',
'Status': 'Enabled',
'Filter': {
'Prefix': '' # Apply to all objects
},
'Expiration': {
'Days': 1 # Delete objects after 24 hours
}
}
]
}
self.s3_client.put_bucket_lifecycle_configuration(
Bucket=bucket_name,
LifecycleConfiguration=lifecycle_config
)
logger.info(f"Successfully set up lifecycle policy for bucket {bucket_name}")
except ClientError as e:
logger.error(f"Failed to set up lifecycle policy: {str(e)}")
# Don't raise the error as the bucket is still usable without the policy

def upload_file_to_s3(self, file_content, bucket_name, object_key):
self.s3_client.upload_fileobj(BytesIO(file_content), bucket_name, object_key)
Expand Down
67 changes: 67 additions & 0 deletions utils/aws_services.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import boto3
from botocore.exceptions import ClientError
import logging

logger = logging.getLogger(__name__)

class S3Service:
def __init__(self, bucket_name="audiotranscribetemp"):
self.s3_client = boto3.client('s3')
self.bucket_name = bucket_name
self._setup_lifecycle_policy()

def _setup_lifecycle_policy(self):
"""
Sets up a lifecycle policy for the S3 bucket to automatically delete objects
after 24 hours, regardless of their status.
"""
try:
lifecycle_config = {
'Rules': [
{
'ID': 'DeleteTempAudioFiles',
'Status': 'Enabled',
'Filter': {
'Prefix': '' # Apply to all objects
},
'Expiration': {
'Days': 1 # Delete objects after 24 hours
}
}
]
}

self.s3_client.put_bucket_lifecycle_configuration(
Bucket=self.bucket_name,
LifecycleConfiguration=lifecycle_config
)
logger.info(f"Successfully set up lifecycle policy for bucket {self.bucket_name}")
except ClientError as e:
logger.error(f"Failed to set up lifecycle policy: {str(e)}")
raise

def upload_audio(self, file_path, object_name):
"""
Upload an audio file to S3.
"""
try:
self.s3_client.upload_file(file_path, self.bucket_name, object_name)
logger.info(f"Successfully uploaded {object_name} to {self.bucket_name}")
return True
except ClientError as e:
logger.error(f"Failed to upload {object_name}: {str(e)}")
return False

def delete_audio(self, object_name):
"""
Delete an audio file from S3.
Note: With lifecycle policy in place, this is optional but recommended
for immediate cleanup after successful processing.
"""
try:
self.s3_client.delete_object(Bucket=self.bucket_name, Key=object_name)
logger.info(f"Successfully deleted {object_name} from {self.bucket_name}")
return True
except ClientError as e:
logger.error(f"Failed to delete {object_name}: {str(e)}")
return False