diff --git a/apps/challenges/apps.py b/apps/challenges/apps.py index b17c978e1c..e8462f4556 100644 --- a/apps/challenges/apps.py +++ b/apps/challenges/apps.py @@ -5,3 +5,6 @@ class ChallengesConfig(AppConfig): name = "challenges" + + def ready(self): + import challenges.signals # noqa diff --git a/apps/challenges/aws_utils.py b/apps/challenges/aws_utils.py index 622756d9c1..e11e8af44a 100644 --- a/apps/challenges/aws_utils.py +++ b/apps/challenges/aws_utils.py @@ -4,15 +4,23 @@ import random import string import uuid +from datetime import timedelta from http import HTTPStatus import yaml from accounts.models import JwtToken from base.utils import get_boto3_client, send_email from botocore.exceptions import ClientError +from challenges.models import Challenge, ChallengePhase from django.conf import settings from django.core import serializers from django.core.files.temp import NamedTemporaryFile +from django.core.mail import EmailMultiAlternatives +from django.template.loader import render_to_string +from django.utils import timezone +from django.utils.html import strip_tags +from hosts.utils import is_user_a_host_of_challenge +from jobs.models import Submission from evalai.celery import app @@ -298,6 +306,10 @@ def register_task_def_by_challenge_pk(client, queue_name, challenge): ] challenge.task_def_arn = task_def_arn challenge.save() + # Update CloudWatch log retention policy on task definition registration + update_challenge_log_retention_on_task_def_registration( + challenge + ) return response except ClientError as e: logger.exception(e) @@ -1077,6 +1089,8 @@ def scale_resources(challenge, worker_cpu_cores, worker_memory): challenge.task_def_arn = task_def_arn challenge.save() + # Update CloudWatch log retention policy on resource scaling + update_challenge_log_retention_on_task_def_registration(challenge) force_new_deployment = False service_name = f"{queue_name}_service" num_of_tasks = challenge.workers @@ -1279,6 +1293,8 @@ def restart_workers_signal_callback(sender, instance, field_name, **kwargs): template_id=template_id, template_data=template_data, ) + # Update CloudWatch log retention policy on restart + update_challenge_log_retention_on_restart(challenge) def get_logs_from_cloudwatch( @@ -1832,6 +1848,8 @@ def challenge_approval_callback(sender, instance, field_name, **kwargs): ) else: construct_and_send_worker_start_mail(challenge) + # Update CloudWatch log retention policy on approval + update_challenge_log_retention_on_approval(challenge) if prev and not curr: if challenge.workers: @@ -1871,3 +1889,477 @@ def update_sqs_retention_period_task(challenge): for obj in serializers.deserialize("json", challenge): challenge_obj = obj.object return update_sqs_retention_period(challenge_obj) + + +def calculate_retention_period_days(challenge_end_date, challenge=None): + """Calculate retention period in days based on challenge end date and consent.""" + if challenge and challenge.retention_policy_consent: + return challenge.log_retention_days_override or 30 + return 3653 # Max AWS retention (10 years) for indefinite retention + + +def map_retention_days_to_aws_values(days): + """Map retention days to valid AWS CloudWatch retention values.""" + valid_periods = [ + 1, + 3, + 5, + 7, + 14, + 30, + 60, + 90, + 120, + 150, + 180, + 365, + 400, + 545, + 731, + 1827, + 3653, + ] + return next((p for p in valid_periods if p >= days), valid_periods[-1]) + + +def set_cloudwatch_log_retention(challenge_pk, retention_days=None): + """Set CloudWatch log retention policy for a challenge's log group.""" + from .utils import get_aws_credentials_for_challenge + + try: + challenge = Challenge.objects.get(pk=challenge_pk) + + if not challenge.retention_policy_consent: + return { + "error": f"Challenge {challenge_pk} host has not consented to retention policy. Please obtain consent before applying retention policies. Without consent, data is retained indefinitely for safety.", + "requires_consent": True, + } + + phases = ChallengePhase.objects.filter(challenge_id=challenge_pk) + if not phases.exists(): + return {"error": f"No phases found for challenge {challenge_pk}"} + + latest_end_date = max( + phase.end_date for phase in phases if phase.end_date + ) + + if retention_days is None: + retention_days = ( + challenge.log_retention_days_override + or calculate_retention_period_days(latest_end_date, challenge) + ) + + aws_retention_days = map_retention_days_to_aws_values(retention_days) + + # Get log group name + log_group_name = get_log_group_name(challenge_pk) + logs_client = get_boto3_client( + "logs", get_aws_credentials_for_challenge(challenge_pk) + ) + + logs_client.put_retention_policy( + logGroupName=log_group_name, retentionInDays=aws_retention_days + ) + + logger.info( + f"Set CloudWatch log retention for challenge {challenge_pk} to {aws_retention_days} days" + ) + + return { + "success": True, + "retention_days": aws_retention_days, + "log_group": log_group_name, + "host_consent": challenge.retention_policy_consent, + } + + except Exception as e: + logger.exception( + f"Failed to set log retention for challenge {challenge_pk}" + ) + return {"error": str(e)} + + +def calculate_submission_retention_date(challenge_phase): + """Calculate when a submission becomes eligible for retention cleanup.""" + from datetime import timedelta + + if not challenge_phase.end_date or challenge_phase.is_public: + return None + + challenge = challenge_phase.challenge + if challenge.retention_policy_consent: + retention_days = calculate_retention_period_days( + challenge_phase.end_date, challenge + ) + return challenge_phase.end_date + timedelta(days=retention_days) + return None + + +def delete_submission_files_from_storage(submission): + """Delete submission files from S3 storage while preserving database records.""" + from .utils import get_aws_credentials_for_challenge + + deleted_files = [] + failed_files = [] + + try: + challenge_pk = submission.challenge_phase.challenge.pk + challenge_aws_keys = get_aws_credentials_for_challenge(challenge_pk) + s3_client = get_boto3_client("s3", challenge_aws_keys) + bucket_name = challenge_aws_keys["AWS_STORAGE_BUCKET_NAME"] + + # List of file fields to delete + file_fields = [ + "input_file", + "submission_input_file", + "stdout_file", + "stderr_file", + "environment_log_file", + "submission_result_file", + "submission_metadata_file", + ] + + for field_name in file_fields: + file_field = getattr(submission, field_name, None) + if file_field and file_field.name: + try: + # Delete from S3 + s3_client.delete_object( + Bucket=bucket_name, Key=file_field.name + ) + deleted_files.append(file_field.name) + # Clear the file field in the database + file_field.delete(save=False) + except ClientError as e: + error_code = e.response.get("Error", {}).get( + "Code", "Unknown" + ) + if ( + error_code != "NoSuchKey" + ): # Ignore if file doesn't exist + failed_files.append( + {"file": file_field.name, "error": str(e)} + ) + logger.warning( + f"Failed to delete {file_field.name}: {e}" + ) + + # Mark submission as having artifacts deleted + submission.is_artifact_deleted = True + submission.artifact_deletion_date = timezone.now() + submission.save( + update_fields=["is_artifact_deleted", "artifact_deletion_date"] + ) + + logger.info( + f"Deleted {len(deleted_files)} files for submission {submission.pk}" + ) + + return { + "success": True, + "deleted_files": deleted_files, + "failed_files": failed_files, + "submission_id": submission.pk, + } + + except Exception as e: + logger.exception( + f"Error deleting files for submission {submission.pk}" + ) + return { + "success": False, + "error": str(e), + "submission_id": submission.pk, + } + + +@app.task +def cleanup_expired_submission_artifacts(): + """Periodic task to clean up expired submission artifacts.""" + logger.info("Starting cleanup of expired submission artifacts") + + eligible_submissions = Submission.objects.filter( + retention_eligible_date__lte=timezone.now(), + retention_eligible_date__isnull=False, + is_artifact_deleted=False, + ).select_related("challenge_phase__challenge") + + if not eligible_submissions.exists(): + logger.info("No submissions eligible for cleanup") + return { + "total_processed": 0, + "successful_deletions": 0, + "failed_deletions": 0, + } + + successful_deletions = 0 + failed_deletions = 0 + errors = [] + + for submission in eligible_submissions: + result = delete_submission_files_from_storage(submission) + if result["success"]: + successful_deletions += 1 + else: + failed_deletions += 1 + errors.append( + { + "submission_id": submission.pk, + "challenge_id": submission.challenge_phase.challenge.pk, + "error": result.get("error", "Unknown error"), + } + ) + + logger.info( + f"Cleanup completed: {successful_deletions} successful, {failed_deletions} failed" + ) + + return { + "total_processed": ( + len(eligible_submissions) + if hasattr(eligible_submissions, "__len__") + else eligible_submissions.count() + ), + "successful_deletions": successful_deletions, + "failed_deletions": failed_deletions, + "errors": errors, + } + + +@app.task +def update_submission_retention_dates(): + """Update retention dates for submissions based on current challenge settings.""" + logger.info("Updating submission retention dates") + + # Get submissions that need retention date updates + submissions = Submission.objects.filter( + retention_eligible_date__isnull=True, + challenge_phase__end_date__isnull=False, + challenge_phase__is_public=False, + ).select_related("challenge_phase__challenge") + + updated_count = 0 + for submission in submissions: + retention_date = calculate_submission_retention_date( + submission.challenge_phase + ) + if retention_date != submission.retention_eligible_date: + submission.retention_eligible_date = retention_date + submission.save(update_fields=["retention_eligible_date"]) + updated_count += 1 + + logger.info(f"Updated retention dates for {updated_count} submissions") + return {"updated_count": updated_count} + + +def send_template_email( + recipient_email, + subject, + template_name, + template_context, + sender_email=None, + reply_to=None, +): + """Send email using template.""" + try: + html_content = render_to_string(template_name, template_context) + text_content = strip_tags(html_content) + + email = EmailMultiAlternatives( + subject=subject, + body=text_content, + from_email=sender_email or settings.CLOUDCV_TEAM_EMAIL, + to=[recipient_email], + reply_to=[reply_to] if reply_to else None, + ) + email.attach_alternative(html_content, "text/html") + email.send() + + logger.info(f"Email sent successfully to {recipient_email}") + return True + except Exception as e: + logger.error(f"Failed to send email to {recipient_email}: {str(e)}") + return False + + +def send_retention_warning_email( + challenge, recipient_email, submission_count, warning_date +): + """Send retention warning email to challenge host.""" + template_context = { + "CHALLENGE_NAME": challenge.title, + "CHALLENGE_DESCRIPTION": challenge.description, + "SUBMISSION_COUNT": submission_count, + "WARNING_DATE": warning_date.strftime("%Y-%m-%d"), + "CHALLENGE_URL": f"{settings.EVALAI_API_SERVER}/web/challenge/{challenge.pk}", + } + + if challenge.image: + template_context["CHALLENGE_IMAGE_URL"] = challenge.image.url + + subject = f"⚠️ Retention Warning: {challenge.title} - {submission_count} submissions will be deleted in 14 days" + + return send_template_email( + recipient_email=recipient_email, + subject=subject, + template_name="challenges/retention_warning.html", + template_context=template_context, + sender_email=settings.CLOUDCV_TEAM_EMAIL, + ) + + +@app.task +def weekly_retention_notifications_and_consent_log(): + """Send warning notifications and log consent changes.""" + logger.info("Processing retention notifications and consent logging") + + # Send warnings for submissions expiring in 14 days + warning_date = timezone.now() + timedelta(days=14) + warning_submissions = Submission.objects.filter( + retention_eligible_date__date=warning_date.date(), + retention_eligible_date__isnull=False, + is_artifact_deleted=False, + ).select_related("challenge_phase__challenge__creator") + + notifications_sent = 0 + if warning_submissions.exists(): + # Group by challenge + challenges_to_notify = {} + for submission in warning_submissions: + challenge = submission.challenge_phase.challenge + if challenge.pk not in challenges_to_notify: + challenges_to_notify[challenge.pk] = { + "challenge": challenge, + "submission_count": 0, + } + challenges_to_notify[challenge.pk]["submission_count"] += 1 + + for challenge_data in challenges_to_notify.values(): + challenge = challenge_data["challenge"] + submission_count = challenge_data["submission_count"] + + if not challenge.inform_hosts: + continue + + try: + emails = challenge.creator.get_all_challenge_host_email() + for email in emails: + if send_retention_warning_email( + challenge=challenge, + recipient_email=email, + submission_count=submission_count, + warning_date=warning_date, + ): + notifications_sent += 1 + break # One successful email per challenge is enough + except Exception as e: + logger.error( + f"Failed to send retention warning email to {email} for challenge {challenge.pk}: {e}" + ) + + # Log recent consent changes + one_week_ago = timezone.now() - timedelta(days=7) + recent_consents = Challenge.objects.filter( + retention_policy_consent=True, + retention_policy_consent_date__gte=one_week_ago, + ).order_by("-retention_policy_consent_date") + + if recent_consents.exists(): + logger.info( + f"[RetentionConsent] {recent_consents.count()} consent changes in the last week:" + ) + for challenge in recent_consents: + consent_date = challenge.retention_policy_consent_date.strftime( + "%Y-%m-%d %H:%M:%S" + ) + consent_by = ( + challenge.retention_policy_consent_by.username + if challenge.retention_policy_consent_by + else "Unknown" + ) + logger.info( + f"[RetentionConsent] ✅ {consent_date} | Challenge {challenge.pk}: {challenge.title[:50]}" + ) + logger.info(f"[RetentionConsent] Consent by: {consent_by}") + if challenge.retention_policy_notes: + logger.info( + f"[RetentionConsent] Notes: {challenge.retention_policy_notes}" + ) + logger.info("[RetentionConsent] End of weekly consent change summary.") + + return {"notifications_sent": notifications_sent} + + +def update_challenge_log_retention(challenge): + """Update CloudWatch log retention for a challenge.""" + if not settings.DEBUG: + try: + result = set_cloudwatch_log_retention(challenge.pk) + if result.get("success"): + logger.info( + f"Updated log retention for challenge {challenge.pk}" + ) + else: + logger.warning( + f"Failed to update log retention for challenge {challenge.pk}: {result.get('error')}" + ) + except Exception: + logger.exception( + f"Error updating log retention for challenge {challenge.pk}" + ) + + +def update_challenge_log_retention_on_approval(challenge): + """Update CloudWatch log retention when a challenge is approved.""" + update_challenge_log_retention(challenge) + + +def update_challenge_log_retention_on_restart(challenge): + """Update CloudWatch log retention when workers are restarted.""" + update_challenge_log_retention(challenge) + + +def update_challenge_log_retention_on_task_def_registration(challenge): + """Update CloudWatch log retention when task definition is registered.""" + update_challenge_log_retention(challenge) + + +def record_host_retention_consent(challenge_pk, user, consent_notes=None): + """Record host consent for retention policy on a challenge.""" + from django.utils import timezone + + from .models import Challenge + + try: + challenge = Challenge.objects.get(pk=challenge_pk) + + if not is_user_a_host_of_challenge(user, challenge_pk): + return { + "error": "User is not authorized to provide retention consent for this challenge" + } + + challenge.retention_policy_consent = True + challenge.retention_policy_consent_date = timezone.now() + challenge.retention_policy_consent_by = user + if consent_notes: + challenge.retention_policy_notes = consent_notes + challenge.save() + + logger.info( + f"Retention policy consent recorded for challenge {challenge_pk} by user {user.username}" + ) + + return { + "success": True, + "message": f"Retention policy consent recorded for challenge {challenge.title}.", + "consent_date": challenge.retention_policy_consent_date.isoformat(), + "consent_by": user.username, + } + + except Challenge.DoesNotExist: + return {"error": f"Challenge {challenge_pk} does not exist"} + except Exception as e: + logger.exception( + f"Error recording retention consent for challenge {challenge_pk}" + ) + return {"error": str(e)} diff --git a/apps/challenges/migrations/0114_add_log_retention_and_consent_fields.py b/apps/challenges/migrations/0114_add_log_retention_and_consent_fields.py new file mode 100644 index 0000000000..506631710f --- /dev/null +++ b/apps/challenges/migrations/0114_add_log_retention_and_consent_fields.py @@ -0,0 +1,67 @@ +# Generated by Django 2.2.20 on 2025-07-12 20:36 + +from django.conf import settings +from django.db import migrations, models +import django.db.models.deletion + + +class Migration(migrations.Migration): + + dependencies = [ + migrations.swappable_dependency(settings.AUTH_USER_MODEL), + ("challenges", "0113_add_github_branch_field_and_unique_constraint"), + ] + + operations = [ + # Log retention override field (from 0114) + migrations.AddField( + model_name="challenge", + name="log_retention_days_override", + field=models.PositiveIntegerField( + blank=True, + default=None, + help_text="Admin override for CloudWatch log retention period in days (defaults to 30 days when host has consented)", + null=True, + ), + ), + # Retention consent fields (from 0115) + migrations.AddField( + model_name="challenge", + name="retention_policy_consent", + field=models.BooleanField( + default=False, + help_text="Challenge host has consented to allow to set a 30-day retention policy for this challenge", + verbose_name="Retention Policy Consent", + ), + ), + migrations.AddField( + model_name="challenge", + name="retention_policy_consent_by", + field=models.ForeignKey( + blank=True, + help_text="User who provided retention policy consent", + null=True, + on_delete=django.db.models.deletion.SET_NULL, + related_name="retention_consent_challenges", + to=settings.AUTH_USER_MODEL, + ), + ), + migrations.AddField( + model_name="challenge", + name="retention_policy_consent_date", + field=models.DateTimeField( + blank=True, + help_text="Date when retention policy consent was given", + null=True, + ), + ), + migrations.AddField( + model_name="challenge", + name="retention_policy_notes", + field=models.TextField( + blank=True, + help_text="Additional notes about retention policy for this challenge", + null=True, + ), + ), + ] diff --git a/apps/challenges/models.py b/apps/challenges/models.py index 823309d9d1..24eef07fdc 100644 --- a/apps/challenges/models.py +++ b/apps/challenges/models.py @@ -134,6 +134,39 @@ def __init__(self, *args, **kwargs): sqs_retention_period = models.PositiveIntegerField( default=345600, verbose_name="SQS Retention Period" ) + log_retention_days_override = models.PositiveIntegerField( + null=True, + blank=True, + default=None, + help_text="Admin override for CloudWatch log retention period in days (defaults to 30 days when host has consented)", + ) + + # Retention policy consent and configuration + retention_policy_consent = models.BooleanField( + default=False, + help_text="Challenge host has consented to allow to set a 30-day retention policy for this challenge", + verbose_name="Retention Policy Consent", + ) + retention_policy_consent_date = models.DateTimeField( + null=True, + blank=True, + help_text="Date when retention policy consent was given", + ) + retention_policy_consent_by = models.ForeignKey( + User, + null=True, + blank=True, + on_delete=models.SET_NULL, + related_name="retention_consent_challenges", + help_text="User who provided retention policy consent", + ) + + # Retention policy documentation and notes + retention_policy_notes = models.TextField( + blank=True, + null=True, + help_text="Additional notes about retention policy for this challenge", + ) is_docker_based = models.BooleanField( default=False, verbose_name="Is Docker Based", db_index=True ) diff --git a/apps/challenges/signals.py b/apps/challenges/signals.py new file mode 100644 index 0000000000..92a1505082 --- /dev/null +++ b/apps/challenges/signals.py @@ -0,0 +1,128 @@ +import logging + +from django.db.models.signals import post_save, pre_save +from django.dispatch import receiver +from jobs.models import Submission + +from .aws_utils import calculate_submission_retention_date +from .models import ChallengePhase + +logger = logging.getLogger(__name__) + + +@receiver(pre_save, sender=ChallengePhase) +def store_original_challenge_phase_values(sender, instance, **kwargs): + """Store original values to detect changes in challenge phase""" + if instance.pk: + try: + original = ChallengePhase.objects.get(pk=instance.pk) + instance._original_end_date = original.end_date + instance._original_is_public = original.is_public + except ChallengePhase.DoesNotExist: + instance._original_end_date = None + instance._original_is_public = None + else: + instance._original_end_date = None + instance._original_is_public = None + + +@receiver(post_save, sender=ChallengePhase) +def update_submission_retention_on_phase_change( + sender, instance, created, **kwargs +): + """ + Update submission retention dates when challenge phase end_date or is_public changes. + This ensures retention policies are automatically updated when challenges end or + become non-public. + """ + if created: + # For new phases, set retention dates if applicable + retention_date = calculate_submission_retention_date(instance) + if retention_date: + # Update existing submissions for this phase + submissions_updated = Submission.objects.filter( + challenge_phase=instance, + retention_eligible_date__isnull=True, + is_artifact_deleted=False, + ).update(retention_eligible_date=retention_date) + + if submissions_updated > 0: + logger.info( + f"Set retention date {retention_date} for {submissions_updated} " + f"submissions in new phase {instance.pk}" + ) + return + + # Check if relevant fields changed + end_date_changed = ( + hasattr(instance, "_original_end_date") + and instance._original_end_date != instance.end_date + ) + + is_public_changed = ( + hasattr(instance, "_original_is_public") + and instance._original_is_public != instance.is_public + ) + + if end_date_changed or is_public_changed: + logger.info( + f"Challenge phase {instance.pk} changed - end_date: {end_date_changed}, " + f"is_public: {is_public_changed}. Updating submission retention dates." + ) + + # Calculate new retention date + retention_date = calculate_submission_retention_date(instance) + + if retention_date: + # Update submissions for this phase + submissions_updated = Submission.objects.filter( + challenge_phase=instance, is_artifact_deleted=False + ).update(retention_eligible_date=retention_date) + + logger.info( + f"Updated retention date to {retention_date} for {submissions_updated} " + f"submissions in phase {instance.pk}" + ) + else: + # Clear retention dates if phase is now public or has no end date + submissions_updated = Submission.objects.filter( + challenge_phase=instance, is_artifact_deleted=False + ).update(retention_eligible_date=None) + + if submissions_updated > 0: + logger.info( + f"Cleared retention dates for {submissions_updated} " + f"submissions in phase {instance.pk} (phase is now public or has no end date)" + ) + + +@receiver(pre_save, sender="jobs.Submission") +def set_submission_retention_on_create(sender, instance, **kwargs): + """ + Set initial retention date when a new submission is created. + This uses pre_save to avoid recursive save calls. + """ + print(f"SIGNAL FIRED: PRE_SAVE on Submission {instance.pk}") + logger.info( + f"PRE_SAVE signal: instance.pk={instance.pk}, " + f"retention_eligible_date={instance.retention_eligible_date}, " + f"phase.is_public={instance.challenge_phase.is_public}, " + f"phase.end_date={instance.challenge_phase.end_date}" + ) + + # Only act on the first save (when pk is not set) + if not instance.pk and not instance.retention_eligible_date: + retention_date = calculate_submission_retention_date( + instance.challenge_phase + ) + logger.info(f"Calculated retention date: {retention_date}") + if retention_date: + instance.retention_eligible_date = retention_date + logger.info( + f"Set initial retention date {retention_date} for new submission " + f"(phase.is_public={instance.challenge_phase.is_public}, " + f"phase.end_date={instance.challenge_phase.end_date})" + ) + + +print("SUBMISSION RETENTION SIGNAL REGISTERED") diff --git a/apps/challenges/templates/challenges/retention_warning.html b/apps/challenges/templates/challenges/retention_warning.html new file mode 100644 index 0000000000..cf94e21938 --- /dev/null +++ b/apps/challenges/templates/challenges/retention_warning.html @@ -0,0 +1,170 @@ + + + + + + Retention Warning - {{ CHALLENGE_NAME }} + + + +
+
+ {% if CHALLENGE_IMAGE_URL %} + {{ CHALLENGE_NAME }} + {% endif %} +
⚠️
+
Retention Warning
+
{{ CHALLENGE_NAME }}
+
+ +
+

Dear Challenge Host,

+ +

This is an important notification regarding your challenge {{ CHALLENGE_NAME }}.

+ +
+ Action Required: Your submission artifacts will be automatically deleted in {{ DAYS_REMAINING }} days ({{ RETENTION_DATE }}) to comply with our data retention policy. +
+ +
+
+
{{ SUBMISSION_COUNT }}
+
Submissions Affected
+
+
+
{{ DAYS_REMAINING }}
+
Days Remaining
+
+
+ +
+

What will be deleted?

+
    +
  • Submission input files
  • +
  • Output files and results
  • +
  • Log files and metadata
  • +
  • All associated artifacts
  • +
+

Note: Challenge metadata, leaderboard data, and participant information will be preserved.

+
+ +

If you need to retain any of this data, please download it before the deletion date.

+ +
+ View Challenge +
+
+ + +
+ + \ No newline at end of file diff --git a/apps/challenges/urls.py b/apps/challenges/urls.py index 3139d5381c..38db134730 100644 --- a/apps/challenges/urls.py +++ b/apps/challenges/urls.py @@ -201,6 +201,22 @@ views.manage_worker, name="manage_worker", ), + # Retention policy consent endpoints + url( + r"^(?P[0-9]+)/retention-consent/$", + views.provide_retention_consent, + name="provide_retention_consent", + ), + url( + r"^(?P[0-9]+)/retention-consent-status/$", + views.get_retention_consent_status, + name="get_retention_consent_status", + ), + url( + r"^(?P[0-9]+)/update-retention-consent/$", + views.update_retention_consent, + name="update_retention_consent", + ), url( r"^(?P[0-9]+)/manage_ec2_instance/(?P[\w-]+)/$", views.manage_ec2_instance, diff --git a/apps/challenges/views.py b/apps/challenges/views.py index 87409330b9..e562fdf77e 100644 --- a/apps/challenges/views.py +++ b/apps/challenges/views.py @@ -5101,3 +5101,284 @@ def modify_leaderboard_data(request): # Serialize and return the updated data response_data = {"message": "Leaderboard data updated successfully!"} return Response(response_data, status=status.HTTP_200_OK) + + +@api_view(["POST"]) +@throttle_classes([UserRateThrottle]) +@permission_classes((permissions.IsAuthenticated, HasVerifiedEmail)) +@authentication_classes((JWTAuthentication, ExpiringTokenAuthentication)) +def provide_retention_consent(request, challenge_pk): + """ + API endpoint for challenge hosts to provide retention policy consent. + + Query Parameters: + - ``notes``: Optional notes about the consent (optional) + + Returns: + dict: Success/error response with consent details + """ + from .aws_utils import record_host_retention_consent + + try: + challenge = Challenge.objects.get(pk=challenge_pk) + except Challenge.DoesNotExist: + response_data = {"error": "Challenge does not exist"} + return Response(response_data, status=status.HTTP_404_NOT_FOUND) + + # Check if user is a host of this challenge + if not is_user_a_host_of_challenge(request.user, challenge_pk): + response_data = { + "error": "You are not authorized to provide retention consent for this challenge" + } + return Response(response_data, status=status.HTTP_403_FORBIDDEN) + + # Check if consent already exists + if challenge.retention_policy_consent: + response_data = { + "message": "Retention policy consent already provided", + "consent_date": ( + challenge.retention_policy_consent_date.isoformat() + if challenge.retention_policy_consent_date + else None + ), + "consent_by": ( + challenge.retention_policy_consent_by.username + if challenge.retention_policy_consent_by + else None + ), + } + return Response(response_data, status=status.HTTP_200_OK) + + # Get optional notes + notes = request.data.get("notes", "") + + # Record the consent + result = record_host_retention_consent(challenge_pk, request.user, notes) + + if result.get("success"): + response_data = { + "message": result["message"], + "consent_date": result["consent_date"], + "consent_by": result["consent_by"], + "challenge_id": challenge_pk, + "challenge_title": challenge.title, + } + return Response(response_data, status=status.HTTP_201_CREATED) + else: + response_data = { + "error": result.get("error", "Failed to record consent") + } + return Response(response_data, status=status.HTTP_400_BAD_REQUEST) + + +@api_view(["GET"]) +@throttle_classes([UserRateThrottle]) +@permission_classes((permissions.IsAuthenticated, HasVerifiedEmail)) +@authentication_classes((JWTAuthentication, ExpiringTokenAuthentication)) +def get_retention_consent_status(request, challenge_pk): + """ + API endpoint to get retention policy consent status for a challenge. + + Returns: + dict: Consent status and details + """ + try: + challenge = Challenge.objects.get(pk=challenge_pk) + except Challenge.DoesNotExist: + response_data = {"error": "Challenge does not exist"} + return Response(response_data, status=status.HTTP_404_NOT_FOUND) + + is_host = is_user_a_host_of_challenge(request.user, challenge_pk) + + response_data = { + "challenge_id": challenge_pk, + "challenge_title": challenge.title, + "has_consent": challenge.retention_policy_consent, + "is_host": is_host, + "can_provide_consent": is_host + and not challenge.retention_policy_consent, + } + + if challenge.retention_policy_consent: + response_data.update( + { + "consent_date": ( + challenge.retention_policy_consent_date.isoformat() + if challenge.retention_policy_consent_date + else None + ), + "consent_by": ( + challenge.retention_policy_consent_by.username + if challenge.retention_policy_consent_by + else None + ), + "retention_notes": challenge.retention_policy_notes, + } + ) + + # Add custom retention policy information + if challenge.retention_policy_consent: + response_data.update( + { + "custom_policies": { + "log_retention_days_override": challenge.log_retention_days_override, + } + } + ) + + return Response(response_data, status=status.HTTP_200_OK) + + +@api_view(["GET"]) +@throttle_classes([UserRateThrottle]) +@permission_classes((permissions.IsAuthenticated, HasVerifiedEmail)) +@authentication_classes((JWTAuthentication, ExpiringTokenAuthentication)) +def get_challenge_retention_info(request, challenge_pk): + """ + API endpoint to get comprehensive retention policy information for challenge management. + This is used in the challenge management dashboard. + + Returns: + dict: Complete retention policy information and consent status + """ + try: + challenge = Challenge.objects.get(pk=challenge_pk) + except Challenge.DoesNotExist: + response_data = {"error": "Challenge does not exist"} + return Response(response_data, status=status.HTTP_404_NOT_FOUND) + + is_host = is_user_a_host_of_challenge(request.user, challenge_pk) + + # Get challenge phases for retention calculation + phases = challenge.challengephase_set.all() + latest_end_date = None + if phases.exists(): + latest_end_date = max( + phase.end_date for phase in phases if phase.end_date + ) + + # Calculate default retention periods + from .aws_utils import ( + calculate_retention_period_days, + map_retention_days_to_aws_values, + ) + + default_retention_days = None + if latest_end_date: + default_retention_days = calculate_retention_period_days( + latest_end_date, challenge + ) + default_retention_days = map_retention_days_to_aws_values( + default_retention_days + ) + + response_data = { + "challenge_id": challenge_pk, + "challenge_title": challenge.title, + "retention_policy": { + "has_consent": challenge.retention_policy_consent, + "consent_date": ( + challenge.retention_policy_consent_date.isoformat() + if challenge.retention_policy_consent_date + else None + ), + "consent_by": ( + challenge.retention_policy_consent_by.username + if challenge.retention_policy_consent_by + else None + ), + "notes": challenge.retention_policy_notes, + }, + "user_permissions": { + "is_host": is_host, + "can_provide_consent": is_host + and not challenge.retention_policy_consent, + "can_manage_retention": is_host + and challenge.retention_policy_consent, + }, + "current_policies": { + "log_retention_days_override": challenge.log_retention_days_override, + }, + "calculated_retention": { + "default_retention_days": default_retention_days, + "latest_phase_end_date": ( + latest_end_date.isoformat() if latest_end_date else None + ), + }, + "policy_descriptions": { + "log_retention": "CloudWatch log retention period in days for the entire challenge", + }, + } + + return Response(response_data, status=status.HTTP_200_OK) + + +@api_view(["POST"]) +@throttle_classes([UserRateThrottle]) +@permission_classes((permissions.IsAuthenticated, HasVerifiedEmail)) +@authentication_classes((JWTAuthentication, ExpiringTokenAuthentication)) +def update_retention_consent(request, challenge_pk): + """ + API endpoint to update retention policy consent status. + This is called from the challenge management interface with a simple checkbox. + + Query Parameters: + - ``consent``: Boolean indicating consent status (required) + - ``notes``: Optional notes about the consent + + Returns: + dict: Success/error response + """ + from .aws_utils import record_host_retention_consent + + try: + challenge = Challenge.objects.get(pk=challenge_pk) + except Challenge.DoesNotExist: + response_data = {"error": "Challenge does not exist"} + return Response(response_data, status=status.HTTP_404_NOT_FOUND) + + # Check if user is a host of this challenge + if not is_user_a_host_of_challenge(request.user, challenge_pk): + response_data = { + "error": "You are not authorized to update retention consent for this challenge" + } + return Response(response_data, status=status.HTTP_403_FORBIDDEN) + + consent = request.data.get("consent") + notes = request.data.get("notes", "") + + if consent is None: + response_data = {"error": "Consent status is required"} + return Response(response_data, status=status.HTTP_400_BAD_REQUEST) + + if consent: + # Record consent + result = record_host_retention_consent( + challenge_pk, request.user, notes + ) + if result.get("success"): + response_data = { + "message": "Retention policy consent recorded successfully", + "consent_date": result["consent_date"], + "consent_by": result["consent_by"], + } + return Response(response_data, status=status.HTTP_200_OK) + else: + response_data = { + "error": result.get("error", "Failed to record consent") + } + return Response(response_data, status=status.HTTP_400_BAD_REQUEST) + else: + # Remove consent (if needed for compliance) + challenge.retention_policy_consent = False + challenge.retention_policy_consent_date = None + challenge.retention_policy_consent_by = None + challenge.retention_policy_notes = notes if notes else None + challenge.save() + + response_data = { + "message": "Retention policy consent removed", + "consent_date": None, + "consent_by": None, + } + return Response(response_data, status=status.HTTP_200_OK) diff --git a/apps/jobs/apps.py b/apps/jobs/apps.py index d909e6b260..873106c653 100644 --- a/apps/jobs/apps.py +++ b/apps/jobs/apps.py @@ -5,3 +5,6 @@ class JobsConfig(AppConfig): name = "jobs" + + def ready(self): + import jobs.signals # noqa diff --git a/apps/jobs/migrations/0027_add_retention_policy_fields.py b/apps/jobs/migrations/0027_add_retention_policy_fields.py new file mode 100644 index 0000000000..d1c20a3fee --- /dev/null +++ b/apps/jobs/migrations/0027_add_retention_policy_fields.py @@ -0,0 +1,41 @@ +# Generated by Django 2.2.20 on 2025-07-04 06:58 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("jobs", "0026_auto_20230804_1946"), + ] + + operations = [ + migrations.AddField( + model_name="submission", + name="artifact_deletion_date", + field=models.DateTimeField( + blank=True, + help_text="Timestamp when submission artifacts were deleted", + null=True, + ), + ), + migrations.AddField( + model_name="submission", + name="is_artifact_deleted", + field=models.BooleanField( + db_index=True, + default=False, + help_text="Flag indicating whether submission artifacts have been deleted", + ), + ), + migrations.AddField( + model_name="submission", + name="retention_eligible_date", + field=models.DateTimeField( + blank=True, + db_index=True, + help_text="Date when submission artifacts become eligible for deletion", + null=True, + ), + ), + ] diff --git a/apps/jobs/migrations/0028_auto_20250712_2207.py b/apps/jobs/migrations/0028_auto_20250712_2207.py new file mode 100644 index 0000000000..a679a06886 --- /dev/null +++ b/apps/jobs/migrations/0028_auto_20250712_2207.py @@ -0,0 +1,32 @@ +# Generated by Django 2.2.20 on 2025-07-12 22:07 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("jobs", "0027_add_retention_policy_fields"), + ] + + operations = [ + migrations.AddField( + model_name="submission", + name="retention_override_reason", + field=models.TextField( + blank=True, + help_text="Reason for any retention policy override applied to this submission", + null=True, + ), + ), + migrations.AddField( + model_name="submission", + name="retention_policy_applied", + field=models.CharField( + blank=True, + help_text="Description of retention policy applied to this submission", + max_length=100, + null=True, + ), + ), + ] diff --git a/apps/jobs/models.py b/apps/jobs/models.py index 4678fe460a..91d828d414 100644 --- a/apps/jobs/models.py +++ b/apps/jobs/models.py @@ -143,6 +143,37 @@ class Submission(TimeStampedModel): submission_metadata = JSONField(blank=True, null=True) is_verified_by_host = models.BooleanField(default=False) + # Retention policy fields + retention_eligible_date = models.DateTimeField( + null=True, + blank=True, + help_text="Date when submission artifacts become eligible for deletion", + db_index=True, + ) + is_artifact_deleted = models.BooleanField( + default=False, + help_text="Flag indicating whether submission artifacts have been deleted", + db_index=True, + ) + artifact_deletion_date = models.DateTimeField( + null=True, + blank=True, + help_text="Timestamp when submission artifacts were deleted", + ) + + # Submission type and retention policy tracking + retention_policy_applied = models.CharField( + max_length=100, + blank=True, + null=True, + help_text="Description of retention policy applied to this submission", + ) + retention_override_reason = models.TextField( + blank=True, + null=True, + help_text="Reason for any retention policy override applied to this submission", + ) + def __str__(self): return "{}".format(self.id) diff --git a/celerybeat.pid b/celerybeat.pid new file mode 100644 index 0000000000..45a4fb75db --- /dev/null +++ b/celerybeat.pid @@ -0,0 +1 @@ +8 diff --git a/docker-compose-production.yml b/docker-compose-production.yml index 506ce0c724..47ea1dd637 100644 --- a/docker-compose-production.yml +++ b/docker-compose-production.yml @@ -37,6 +37,23 @@ services: awslogs-group: celery_production awslogs-create-group: "true" + celerybeat: + image: ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/evalai-production-celery:${COMMIT_ID} + env_file: + - docker/prod/docker_production.env + build: + context: ./ + dockerfile: docker/prod/celery/Dockerfile + command: ["celery", "-A", "evalai", "beat", "-l", "INFO", "-s", "/tmp/celerybeat-schedule"] + depends_on: + - django + logging: + driver: awslogs + options: + awslogs-region: ${AWS_DEFAULT_REGION} + awslogs-group: celerybeat_production + awslogs-create-group: "true" + worker_py3_7: image: ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/evalai-production-worker-py3.7:${COMMIT_ID} build: diff --git a/docker-compose-staging.yml b/docker-compose-staging.yml index 4e75a5c260..7d893dce11 100644 --- a/docker-compose-staging.yml +++ b/docker-compose-staging.yml @@ -37,6 +37,23 @@ services: awslogs-group: celery_staging awslogs-create-group: 'true' + celerybeat: + image: ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/evalai-staging-celery:${COMMIT_ID} + env_file: + - docker/prod/docker_staging.env + build: + context: ./ + dockerfile: docker/prod/celery/Dockerfile + command: ["celery", "-A", "evalai", "beat", "-l", "INFO", "-s", "/tmp/celerybeat-schedule"] + depends_on: + - django + logging: + driver: awslogs + options: + awslogs-region: us-east-1 + awslogs-group: celerybeat_staging + awslogs-create-group: 'true' + worker_py3_7: image: ${AWS_ACCOUNT_ID}.dkr.ecr.us-east-1.amazonaws.com/evalai-staging-worker-py3.7:${COMMIT_ID} build: diff --git a/evalai/celery.py b/evalai/celery.py index 1937a5d9a4..8dd232f3ee 100644 --- a/evalai/celery.py +++ b/evalai/celery.py @@ -11,7 +11,6 @@ else: app.conf.task_default_queue = os.environ.get("CELERY_QUEUE_NAME") -app.config_from_object("django.conf:settings") app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) if __name__ == "__main__": diff --git a/frontend/src/js/controllers/challengeCtrl.js b/frontend/src/js/controllers/challengeCtrl.js index 57ab960200..a836e88fde 100644 --- a/frontend/src/js/controllers/challengeCtrl.js +++ b/frontend/src/js/controllers/challengeCtrl.js @@ -3122,6 +3122,130 @@ } }; + // Retention consent logic for challenge hosts + vm.retentionConsentChecked = false; + vm.retentionConsentInfo = null; + vm.retentionConsentLoading = true; + vm.retentionConsentError = null; + vm.retentionConsentSaving = false; + + // Fetch retention consent status + vm.fetchRetentionConsentStatus = function() { + vm.retentionConsentLoading = true; + vm.retentionConsentError = null; + var parameters = { + url: 'challenges/' + vm.challengeId + '/retention-consent-status/', + method: 'GET', + token: userKey, + data: {}, + callback: { + onSuccess: function(response) { + var data = response.data; + vm.retentionConsentChecked = !!data.has_consent; + vm.retentionConsentInfo = { + consent_by: data.consent_by, + consent_date: data.consent_date, + notes: data.retention_notes + }; + vm.retentionConsentLoading = false; + }, + onError: function(response) { + vm.retentionConsentError = response.data && response.data.error ? response.data.error : 'Failed to load retention consent status.'; + vm.retentionConsentLoading = false; + } + } + }; + utilities.sendRequest(parameters); + }; + + // Call on init if host + $scope.$watch(function() { return vm.isChallengeHost }, function(newVal) { + if (newVal) { + vm.fetchRetentionConsentStatus(); + } + }); + + // Also call on initial load if already a host + if (vm.isChallengeHost) { + vm.fetchRetentionConsentStatus(); + } + + // Toggle retention consent with template dialog + vm.toggleRetentionConsent = function(ev) { + // Prevent action if loading + if (vm.retentionConsentLoading) { + return; + } + + // Determine consent state and show appropriate dialog + var consentState = vm.retentionConsentChecked ? 'withdraw' : 'provide'; + var dialogTitle, dialogContent, okText; + + if (consentState === 'provide') { + dialogTitle = 'Provide Retention Policy Consent'; + dialogContent = 'By providing consent, you allow EvalAI admins to set a 30-day retention policy for this challenge. This means submission files, logs, and evaluation outputs may be deleted after 30 days to reduce storage costs. Without consent, data is retained longer for safety.'; + okText = 'Yes, I consent'; + } else { + dialogTitle = 'Withdraw Retention Policy Consent'; + dialogContent = 'By withdrawing consent, EvalAI will retain your challenge data longer for safety and compliance. You may provide consent again at any time.'; + okText = 'Yes, withdraw consent'; + } + + var confirm = $mdDialog.confirm() + .title(dialogTitle) + .textContent(dialogContent) + .ariaLabel('Retention Policy Consent') + .targetEvent(ev) + .ok(okText) + .cancel('Cancel'); + + $mdDialog.show(confirm).then(function () { + // User clicked "Yes" - toggle consent status + vm.retentionConsentChecked = !vm.retentionConsentChecked; + actuallyToggleRetentionConsent(); + }, function() { + // User clicked "Cancel" - do nothing + }); + }; + + function actuallyToggleRetentionConsent() { + vm.retentionConsentSaving = true; + vm.retentionConsentError = null; + + // Determine success message + var consentState = vm.retentionConsentChecked ? 'provided' : 'withdrawn'; + + var parameters = { + url: 'challenges/' + vm.challengeId + '/update-retention-consent/', + method: 'POST', + token: userKey, + data: { consent: vm.retentionConsentChecked }, + callback: { + onSuccess: function(response) { + if (vm.retentionConsentChecked) { + vm.retentionConsentInfo = { + consent_by: response.data.consent_by, + consent_date: response.data.consent_date, + notes: response.data.notes + }; + } else { + vm.retentionConsentInfo = null; + } + vm.retentionConsentSaving = false; + $rootScope.notify('success', 'Retention policy consent ' + consentState + ' successfully'); + }, + onError: function(response) { + vm.retentionConsentError = response.data && response.data.error ? response.data.error : 'Failed to update retention consent.'; + // Revert checkbox + vm.retentionConsentChecked = !vm.retentionConsentChecked; + vm.retentionConsentSaving = false; + $rootScope.notify('error', vm.retentionConsentError); + } + } + }; + utilities.sendRequest(parameters); + } + } })(); diff --git a/frontend/src/views/web/challenge/challenge-page.html b/frontend/src/views/web/challenge/challenge-page.html index 546cfb4918..ab17540c0e 100644 --- a/frontend/src/views/web/challenge/challenge-page.html +++ b/frontend/src/views/web/challenge/challenge-page.html @@ -114,6 +114,30 @@

{{challenge.page.title}}
Toggle Participation
+ + +
+
+
+
+
+
+ Loading... + + Retention Consent: Active + Retention Consent: Not Provided + +
+ +
+ + diff --git a/frontend/src/views/web/challenge/manage.html b/frontend/src/views/web/challenge/manage.html index fe3ecd5f8b..0bca4af5fe 100644 --- a/frontend/src/views/web/challenge/manage.html +++ b/frontend/src/views/web/challenge/manage.html @@ -71,4 +71,4 @@

Remote Evaluation Meta
- + \ No newline at end of file diff --git a/frontend/tests/controllers-test/challengeCtrl.test.js b/frontend/tests/controllers-test/challengeCtrl.test.js index 1ebfa38326..b62d773fec 100644 --- a/frontend/tests/controllers-test/challengeCtrl.test.js +++ b/frontend/tests/controllers-test/challengeCtrl.test.js @@ -2714,4 +2714,38 @@ describe('Unit tests for challenge controller', function () { expect($mdDialogOpened).toBe(true); }); }); + + describe('Retention Consent Toggle', function () { + var $mdDialog, $rootScope, $controller, $scope, vm; + beforeEach(inject(function (_$mdDialog_, _$rootScope_, _$controller_) { + $mdDialog = _$mdDialog_; + $rootScope = _$rootScope_; + $scope = $rootScope.$new(); + vm = _$controller_('ChallengeCtrl', { $scope: $scope }); + })); + + it('should open a dialog when retention consent toggle is clicked', function () { + spyOn($mdDialog, 'show').and.callFake(function () { + var deferred = $injector.get('$q').defer(); + return deferred.promise; + }); + // Set required properties to ensure the function doesn't return early + vm.retentionConsentChecked = true; + vm.retentionConsentLoading = false; // This is the key - must be false to proceed + vm.challengeId = 123; // Set a challenge ID + vm.toggleRetentionConsent({}); + expect($mdDialog.show).toHaveBeenCalled(); + }); + + it('should not open a dialog when retention consent is loading', function () { + spyOn($mdDialog, 'show').and.callFake(function () { + var deferred = $injector.get('$q').defer(); + return deferred.promise; + }); + vm.retentionConsentChecked = true; + vm.retentionConsentLoading = true; // This should prevent the dialog from showing + vm.toggleRetentionConsent({}); + expect($mdDialog.show).not.toHaveBeenCalled(); + }); + }); }); diff --git a/scripts/deployment/deploy.sh b/scripts/deployment/deploy.sh index 93688c1dcc..5ee5c0690f 100755 --- a/scripts/deployment/deploy.sh +++ b/scripts/deployment/deploy.sh @@ -49,8 +49,8 @@ case $opt in eval $(aws ecr get-login --no-include-email) aws s3 cp s3://cloudcv-secrets/evalai/${env}/docker_${env}.env ./docker/prod/docker_${env}.env docker-compose -f docker-compose-${env}.yml rm -s -v -f - docker-compose -f docker-compose-${env}.yml pull django nodejs celery node_exporter memcached - docker-compose -f docker-compose-${env}.yml up -d --force-recreate --remove-orphans django nodejs celery node_exporter memcached + docker-compose -f docker-compose-${env}.yml pull django nodejs celery celerybeat node_exporter memcached + docker-compose -f docker-compose-${env}.yml up -d --force-recreate --remove-orphans django nodejs celery celerybeat node_exporter memcached ENDSSH2 ENDSSH ;; @@ -98,8 +98,8 @@ case $opt in echo "Completed deploy operation." ;; deploy-celery) - echo "Deploying celery docker container..." - docker-compose -f docker-compose-${env}.yml up -d celery + echo "Deploying celery worker and beat docker containers..." + docker-compose -f docker-compose-${env}.yml up -d celery celerybeat echo "Completed deploy operation." ;; deploy-worker) @@ -234,7 +234,7 @@ case $opt in echo " Eg. ./scripts/deployment/deploy.sh deploy-nodejs production" echo " deploy-nodejs-v2 : Deploy new frontend container in the respective environment." echo " Eg. ./scripts/deployment/deploy.sh deploy-nodejs-v2 production" - echo " deploy-celery : Deploy celery containers in the respective environment." + echo " deploy-celery : Deploy celery worker and beat containers in the respective environment." echo " Eg. ./scripts/deployment/deploy.sh deploy-celery production" echo " deploy-worker : Deploy worker container for a challenge using challenge pk." echo " Eg. ./scripts/deployment/deploy.sh deploy-worker production " diff --git a/scripts/manage_retention.py b/scripts/manage_retention.py new file mode 100644 index 0000000000..19f7eef932 --- /dev/null +++ b/scripts/manage_retention.py @@ -0,0 +1,149 @@ +#!/usr/bin/env python3 +""" +Simplified retention management script. + +Usage: + docker-compose exec django python scripts/manage_retention.py cleanup [--dry-run] + docker-compose exec django python scripts/manage_retention.py status [--challenge-id ] + docker-compose exec django python scripts/manage_retention.py set-retention [--days ] + docker-compose exec django python scripts/manage_retention.py consent +""" + +import os +import sys + +import django +from challenges.aws_utils import ( + cleanup_expired_submission_artifacts, + record_host_retention_consent, + set_cloudwatch_log_retention, +) +from challenges.models import Challenge +from django.contrib.auth import get_user_model +from django.utils import timezone +from jobs.models import Submission + +sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)) + "/../") +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings.common") + +django.setup() + + +def cleanup(dry_run=False): + """Clean up expired submission artifacts.""" + if dry_run: + print("DRY RUN: Would clean up expired submissions") + return + result = cleanup_expired_submission_artifacts.delay() + print(f"Cleanup task started: {result.id}") + + +def status(challenge_id=None): + """Show retention status.""" + if challenge_id: + try: + challenge = Challenge.objects.get(id=challenge_id) + print(f"\nChallenge: {challenge.title} (ID: {challenge.pk})") + print( + f"Consent: {'Yes' if challenge.retention_policy_consent else 'No'}" + ) + if challenge.retention_policy_consent: + print( + f"Consent by: {challenge.retention_policy_consent_by.username if challenge.retention_policy_consent_by else 'Unknown'}" + ) + print( + f"Consent date: {challenge.retention_policy_consent_date}" + ) + submissions = Submission.objects.filter( + challenge_phase__challenge=challenge + ) + eligible = submissions.filter( + retention_eligible_date__lte=timezone.now(), + is_artifact_deleted=False, + ) + print( + f"Submissions: {submissions.count()} total, {eligible.count()} eligible for cleanup" + ) + except Challenge.DoesNotExist: + print(f"Challenge {challenge_id} not found") + else: + challenges = Challenge.objects.all() + consented = challenges.filter(retention_policy_consent=True).count() + total_submissions = Submission.objects.count() + eligible_submissions = Submission.objects.filter( + retention_eligible_date__lte=timezone.now(), + is_artifact_deleted=False, + ).count() + print("\nOverall Status:") + print(f"Challenges with consent: {consented}/{challenges.count()}") + print(f"Total submissions: {total_submissions}") + print(f"Eligible for cleanup: {eligible_submissions}") + + +def set_retention(challenge_id, days=None): + """Set log retention for a challenge.""" + try: + result = set_cloudwatch_log_retention(challenge_id, days) + if result.get("success"): + print(f"Success: Retention set to {result['retention_days']} days") + else: + print(f"Error: {result.get('error')}") + except Exception as e: + print(f"Error: {e}") + + +def consent(challenge_id, username): + """Record consent for a challenge.""" + try: + user = get_user_model().objects.get(username=username) + result = record_host_retention_consent(challenge_id, user) + if result.get("success"): + print("Consent recorded successfully") + else: + print(f"Error: {result.get('error')}") + except get_user_model().DoesNotExist: + print(f"User {username} not found") + except Exception as e: + print(f"Error: {e}") + + +def main(): + if len(sys.argv) < 2: + print(__doc__) + return + action = sys.argv[1] + if action == "cleanup": + dry_run = "--dry-run" in sys.argv + cleanup(dry_run) + elif action == "status": + challenge_id = None + if "--challenge-id" in sys.argv: + idx = sys.argv.index("--challenge-id") + if idx + 1 < len(sys.argv): + challenge_id = int(sys.argv[idx + 1]) + status(challenge_id) + elif action == "set-retention": + if len(sys.argv) < 3: + print("Usage: set-retention [--days ]") + return + challenge_id = int(sys.argv[2]) + days = None + if "--days" in sys.argv: + idx = sys.argv.index("--days") + if idx + 1 < len(sys.argv): + days = int(sys.argv[idx + 1]) + set_retention(challenge_id, days) + elif action == "consent": + if len(sys.argv) < 4: + print("Usage: consent ") + return + challenge_id = int(sys.argv[2]) + username = sys.argv[3] + consent(challenge_id, username) + else: + print(f"Unknown action: {action}") + print(__doc__) + + +if __name__ == "__main__": + main() diff --git a/settings/common.py b/settings/common.py index 3aa982e58d..819efad4a0 100755 --- a/settings/common.py +++ b/settings/common.py @@ -15,6 +15,8 @@ import sys from datetime import timedelta +from celery.schedules import crontab + # Build paths inside the project like this: os.path.join(BASE_DIR, ...) BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) APPS_DIR = os.path.join(BASE_DIR, "apps") @@ -212,6 +214,32 @@ # Broker url for celery CELERY_BROKER_URL = "sqs://%s:%s@" % (AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY) +# Celery Beat Schedule for Periodic Tasks +CELERY_BEAT_SCHEDULE = { + "cleanup-expired-submission-artifacts": { + "task": "challenges.aws_utils.cleanup_expired_submission_artifacts", + "schedule": crontab( + hour=2, minute=0, day_of_month=1 + ), # Monthly on the 1st at 2 AM UTC + }, + "weekly-retention-notifications-and-consent-log": { + "task": "challenges.aws_utils.weekly_retention_notifications_and_consent_log", + "schedule": crontab( + hour=10, minute=0, day_of_week=1 + ), # Weekly on Mondays at 10 AM UTC + }, + "update-submission-retention-dates": { + "task": "challenges.aws_utils.update_submission_retention_dates", + "schedule": crontab( + hour=1, minute=0, day_of_week=0 + ), # Weekly on Sundays at 1 AM UTC + }, +} + +# Celery timezone configuration +CELERY_TIMEZONE = "UTC" +CELERY_ENABLE_UTC = True + # CORS Settings CORS_ORIGIN_ALLOW_ALL = True @@ -275,6 +303,16 @@ "level": "ERROR", "propagate": False, }, + "challenges.aws_utils": { + "handlers": ["console", "logfile"], + "level": "INFO", + "propagate": False, + }, + "celery": { + "handlers": ["console", "logfile"], + "level": "INFO", + "propagate": False, + }, }, } diff --git a/tests/unit/challenges/test_aws_utils.py b/tests/unit/challenges/test_aws_utils.py index 67940e5d1c..44522667bd 100644 --- a/tests/unit/challenges/test_aws_utils.py +++ b/tests/unit/challenges/test_aws_utils.py @@ -1,8 +1,9 @@ -import unittest +from datetime import timedelta from http import HTTPStatus -from unittest import TestCase, mock -from unittest.mock import MagicMock, mock_open, patch +from unittest import mock +from unittest.mock import MagicMock, Mock, mock_open, patch +import django import pytest from botocore.exceptions import ClientError from challenges.aws_utils import ( @@ -21,6 +22,7 @@ scale_resources, scale_workers, service_manager, + set_cloudwatch_log_retention, setup_ec2, setup_eks_cluster, start_ec2_instance, @@ -32,10 +34,17 @@ update_sqs_retention_period, update_sqs_retention_period_task, ) -from challenges.models import Challenge +from challenges.models import Challenge, ChallengePhase from django.contrib.auth.models import User from django.core import serializers +from django.test import TestCase +from django.utils import timezone from hosts.models import ChallengeHostTeam +from jobs.models import Submission +from participants.models import ParticipantTeam + +# Note: This file uses unittest.TestCase for most tests, but django.test.TestCase for tests that require database operations. +# Classes with django.test.TestCase are explicitly commented to indicate they need database access. class AWSUtilsTestCase(TestCase): @@ -136,16 +145,43 @@ def test_create_service_success( ): mock_challenge.workers = None mock_challenge.task_def_arn = "valid_task_def_arn" + mock_challenge.queue = "test_queue" response_metadata = {"HTTPStatusCode": HTTPStatus.OK} mock_client.create_service.return_value = { "ResponseMetadata": response_metadata } - with patch( + with mock.patch( "challenges.aws_utils.register_task_def_by_challenge_pk", return_value={"ResponseMetadata": response_metadata}, - ): + ), mock.patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "serviceName": "test_queue_service", + "taskDefinition": "valid_task_def_arn", + "desiredCount": 1, + "clientToken": "dummy_client_token", + "launchType": "FARGATE", + "platformVersion": "LATEST", + "networkConfiguration": { + "awsvpcConfiguration": { + "subnets": ["subnet-1", "subnet-2"], + "securityGroups": ["sg-1"], + "assignPublicIp": "ENABLED", + } + }, + "schedulingStrategy": "REPLICA", + "deploymentController": {"type": "ECS"}, + "deploymentConfiguration": { + "deploymentCircuitBreaker": { + "enable": True, + "rollback": False, + } + }, + } + response = create_service_by_challenge_pk( mock_client, mock_challenge, client_token ) @@ -159,6 +195,7 @@ def test_create_service_client_error( ): mock_challenge.workers = None mock_challenge.task_def_arn = "valid_task_def_arn" + mock_challenge.queue = "test_queue" mock_client.create_service.side_effect = ClientError( error_response={ @@ -173,7 +210,33 @@ def test_create_service_client_error( return_value={ "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK} }, - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "serviceName": "test_queue_service", + "taskDefinition": "valid_task_def_arn", + "desiredCount": 1, + "clientToken": "dummy_client_token", + "launchType": "FARGATE", + "platformVersion": "LATEST", + "networkConfiguration": { + "awsvpcConfiguration": { + "subnets": ["subnet-1", "subnet-2"], + "securityGroups": ["sg-1"], + "assignPublicIp": "ENABLED", + } + }, + "schedulingStrategy": "REPLICA", + "deploymentController": {"type": "ECS"}, + "deploymentConfiguration": { + "deploymentCircuitBreaker": { + "enable": True, + "rollback": False, + } + }, + } + response = create_service_by_challenge_pk( mock_client, mock_challenge, client_token ) @@ -187,6 +250,7 @@ def test_service_already_exists( self, mock_client, mock_challenge, client_token ): mock_challenge.workers = 1 + mock_challenge.queue = "test_queue" response = create_service_by_challenge_pk( mock_client, mock_challenge, client_token @@ -205,6 +269,7 @@ def test_register_task_def_fails( mock_challenge.task_def_arn = ( None # Simulate task definition is not yet registered ) + mock_challenge.queue = "test_queue" register_task_response = { "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST} @@ -288,14 +353,26 @@ def test_delete_service_success_when_workers_zero(mock_challenge, mock_client): mock_challenge.task_def_arn = ( "valid_task_def_arn" # Ensure task_def_arn is set to a valid string ) + mock_challenge.queue = "test_queue" response_metadata_ok = { "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK} } with patch( "challenges.aws_utils.get_boto3_client", return_value=mock_client - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "service": "test_queue_service", + "force": True, + } + mock_client.delete_service.return_value = response_metadata_ok + # Mock the deregister_task_definition call to return success + mock_client.deregister_task_definition.return_value = ( + response_metadata_ok + ) response = delete_service_by_challenge_pk(mock_challenge) @@ -311,18 +388,30 @@ def test_delete_service_success_when_workers_not_zero( ): mock_challenge.workers = 3 mock_challenge.task_def_arn = "valid_task_def_arn" + mock_challenge.queue = "test_queue" response_metadata_ok = { "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK} } with patch( "challenges.aws_utils.get_boto3_client", return_value=mock_client - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "service": "test_queue_service", + "force": True, + } + with patch( "challenges.aws_utils.update_service_by_challenge_pk", return_value=response_metadata_ok, ): mock_client.delete_service.return_value = response_metadata_ok + # Mock the deregister_task_definition call to return success + mock_client.deregister_task_definition.return_value = ( + response_metadata_ok + ) response = delete_service_by_challenge_pk(mock_challenge) @@ -335,13 +424,22 @@ def test_delete_service_success_when_workers_not_zero( def test_update_service_failure(mock_challenge, mock_client): mock_challenge.workers = 3 + mock_challenge.task_def_arn = "valid_task_def_arn" + mock_challenge.queue = "test_queue" response_metadata_error = { "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST} } with patch( "challenges.aws_utils.get_boto3_client", return_value=mock_client - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "service": "test_queue_service", + "force": True, + } + with patch( "challenges.aws_utils.update_service_by_challenge_pk", return_value=response_metadata_error, @@ -353,17 +451,28 @@ def test_update_service_failure(mock_challenge, mock_client): == HTTPStatus.BAD_REQUEST ) mock_client.delete_service.assert_not_called() + # deregister_task_definition should not be called when update_service fails + mock_client.deregister_task_definition.assert_not_called() def test_delete_service_failure(mock_challenge, mock_client): mock_challenge.workers = 0 + mock_challenge.task_def_arn = "valid_task_def_arn" + mock_challenge.queue = "test_queue" response_metadata_error = { "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST} } with patch( "challenges.aws_utils.get_boto3_client", return_value=mock_client - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "service": "test_queue_service", + "force": True, + } + mock_client.delete_service.return_value = response_metadata_error response = delete_service_by_challenge_pk(mock_challenge) @@ -373,17 +482,27 @@ def test_delete_service_failure(mock_challenge, mock_client): == HTTPStatus.BAD_REQUEST ) mock_challenge.save.assert_not_called() + # deregister_task_definition should not be called when delete_service fails + mock_client.deregister_task_definition.assert_not_called() def test_deregister_task_definition_failure(mock_challenge, mock_client): mock_challenge.workers = 0 + mock_challenge.queue = "test_queue" response_metadata_ok = { "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.OK} } with patch( "challenges.aws_utils.get_boto3_client", return_value=mock_client - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "service": "test_queue_service", + "force": True, + } + mock_client.delete_service.return_value = response_metadata_ok mock_client.deregister_task_definition.side_effect = ClientError( error_response={ @@ -406,10 +525,18 @@ def test_deregister_task_definition_failure(mock_challenge, mock_client): def test_delete_service_client_error(mock_challenge, mock_client): mock_challenge.workers = 0 + mock_challenge.queue = "test_queue" with patch( "challenges.aws_utils.get_boto3_client", return_value=mock_client - ): + ), patch("json.loads") as mock_json_loads: + # Mock json.loads to return a valid dict instead of parsing the template + mock_json_loads.return_value = { + "cluster": "cluster", + "service": "test_queue_service", + "force": True, + } + mock_client.delete_service.side_effect = ClientError( error_response={ "Error": {"Code": "DeleteServiceError"}, @@ -496,7 +623,7 @@ def test_service_manager_creates_service( ) -class TestStopEc2Instance(unittest.TestCase): +class TestStopEc2Instance(TestCase): @patch("challenges.aws_utils.get_boto3_client") def test_stop_instance_success(self, mock_get_boto3_client): # Mocking the EC2 client @@ -632,7 +759,7 @@ def test_instance_not_found(self, mock_get_boto3_client): ) -class TestDescribeEC2Instance(unittest.TestCase): +class TestDescribeEC2Instance(TestCase): @patch( "challenges.aws_utils.get_boto3_client" ) # Mock the `get_boto3_client` function @@ -838,7 +965,7 @@ def test_multiple_instances(self, mock_get_boto3_client): ) -class TestStartEC2Instance(unittest.TestCase): +class TestStartEC2Instance(TestCase): @patch( "challenges.aws_utils.get_boto3_client" ) # Mock the `get_boto3_client` function @@ -994,7 +1121,7 @@ def test_start_ec2_instance_exception( mock_logger.exception.assert_called_once() -class TestRestartEC2Instance(unittest.TestCase): +class TestRestartEC2Instance(TestCase): @patch("challenges.aws_utils.get_boto3_client") @patch("challenges.aws_utils.logger") def test_restart_ec2_instance_success( @@ -1060,7 +1187,7 @@ def test_restart_ec2_instance_client_error( ) -class TestTerminateEC2Instance(unittest.TestCase): +class TestTerminateEC2Instance(TestCase): @patch("challenges.aws_utils.get_boto3_client") @patch("challenges.aws_utils.logger") def test_terminate_ec2_instance_success( @@ -1134,7 +1261,7 @@ def test_terminate_ec2_instance_client_error( challenge.save.assert_not_called() -class TestCreateEC2Instance(unittest.TestCase): +class TestCreateEC2Instance(TestCase): @patch("challenges.aws_utils.get_boto3_client") def test_existing_ec2_instance_id(self, mock_get_boto3_client): # Mock challenge object with existing EC2 instance ID @@ -1251,7 +1378,7 @@ def test_create_ec2_instance_client_error( self.assertEqual(str(logged_exception), str(client_error)) -class TestUpdateSQSRetentionPeriod(unittest.TestCase): +class TestUpdateSQSRetentionPeriod(TestCase): @patch("challenges.aws_utils.get_boto3_client") @patch("challenges.aws_utils.logger") def test_update_sqs_retention_period_success( @@ -1331,7 +1458,7 @@ def test_update_sqs_retention_period_failure( mock_logger.exception.assert_called_once() -class TestStartWorkers(unittest.TestCase): +class TestStartWorkers(TestCase): @patch("challenges.aws_utils.get_boto3_client") @patch("challenges.aws_utils.service_manager") @patch("challenges.aws_utils.settings", DEBUG=True) @@ -1614,7 +1741,7 @@ def test_stop_workers_no_active_workers( mock_service_manager.assert_not_called() -class TestScaleWorkers(unittest.TestCase): +class TestScaleWorkers(TestCase): @patch("challenges.aws_utils.settings", DEBUG=True) def test_scale_workers_debug_mode(self, mock_settings): # Mock queryset with challenges @@ -1790,7 +1917,7 @@ def test_scale_workers_failure( self.assertEqual(mock_service_manager.call_count, 2) -class TestScaleResources(unittest.TestCase): +class TestScaleResources(TestCase): @patch("challenges.aws_utils.settings", DEBUG=False) @patch("challenges.aws_utils.get_boto3_client") def test_scale_resources_no_changes( @@ -1857,22 +1984,21 @@ def test_scale_resources_deregister_success( challenge.task_def_arn = "some_task_def_arn" challenge.worker_cpu_cores = 2 challenge.worker_memory = 4096 + challenge.worker_image_url = "some_image_url" + challenge.queue = "queue_name" + challenge.ephemeral_storage = 50 + challenge.pk = 123 + challenge.workers = 10 # Mock other dependencies with patch( "challenges.utils.get_aws_credentials_for_challenge" ) as mock_get_aws_credentials_for_challenge, patch( - "challenges.aws_utils.task_definition", new_callable=MagicMock - ) as mock_task_definition, patch( - "challenges.aws_utils.eval" - ) as mock_eval: + "challenges.aws_utils.task_definition" + ) as mock_task_definition: mock_get_aws_credentials_for_challenge.return_value = {} - mock_task_definition.return_value = { - "some_key": "some_value" - } # Use a dictionary here - mock_eval.return_value = { - "some_key": "some_value" - } # Use a dictionary here + # Mock task_definition as a string template that returns valid JSON when formatted + mock_task_definition.format.return_value = '{"family": "worker_queue_name", "containerDefinitions": [{"name": "worker_queue_name"}]}' # Mock register_task_definition response mock_client.register_task_definition.return_value = { @@ -1954,18 +2080,15 @@ def test_scale_resources_register_task_def_success( with patch( "challenges.utils.get_aws_credentials_for_challenge" ) as mock_get_aws_credentials_for_challenge, patch( - "challenges.aws_utils.task_definition", new_callable=MagicMock + "challenges.aws_utils.task_definition" ) as mock_task_definition, patch( - "challenges.aws_utils.eval" - ) as mock_eval: + "challenges.aws_utils.update_service_args" + ) as mock_update_service_args: mock_get_aws_credentials_for_challenge.return_value = {} - mock_task_definition.return_value = { - "some_key": "some_value" - } # Use a dictionary here - mock_eval.return_value = { - "some_key": "some_value" - } # Use a dictionary here + # Mock task_definition as a string template that returns valid JSON when formatted + mock_task_definition.format.return_value = '{"family": "worker_queue_name", "containerDefinitions": [{"name": "worker_queue_name"}]}' + mock_update_service_args.format.return_value = '{"cluster": "evalai-prod-cluster", "service": "queue_name_service"}' # Mock register_task_definition response mock_client.register_task_definition.return_value = { @@ -2016,36 +2139,31 @@ def test_scale_resources_register_task_def_failure( with patch( "challenges.utils.get_aws_credentials_for_challenge" ) as mock_get_aws_credentials_for_challenge, patch( - "challenges.aws_utils.task_definition", new_callable=MagicMock - ) as mock_task_definition, patch( - "challenges.aws_utils.eval" - ) as mock_eval: + "challenges.aws_utils.task_definition" + ) as mock_task_definition: mock_get_aws_credentials_for_challenge.return_value = {} - mock_task_definition.return_value = { - "some_key": "some_value" - } # Use a dictionary here - mock_eval.return_value = { - "some_key": "some_value" - } # Use a dictionary here - - # Mock register_task_definition response with error - mock_client.register_task_definition.return_value = { - "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST}, - "Error": "Failed to register task definition", - } + # Mock task_definition as a string template that returns valid JSON when formatted + mock_task_definition.format.return_value = '{"family": "worker_queue_name", "containerDefinitions": [{"name": "worker_queue_name"}]}' - # Call the function - result = scale_resources( - challenge, worker_cpu_cores=4, worker_memory=8192 + # Mock register_task_definition to raise ClientError + mock_client.register_task_definition.side_effect = ClientError( + {"Error": {"Message": "Failed to register task definition"}}, + "RegisterTaskDefinition", ) - # Expected result - expected_result = { - "Error": "Failed to register task definition", - "ResponseMetadata": {"HTTPStatusCode": HTTPStatus.BAD_REQUEST}, - } - self.assertEqual(result, expected_result) + # Call the function + response = scale_resources(challenge, 4, 8192) + + # Verify the response + self.assertEqual( + response["Error"]["Message"], + "Failed to register task definition", + ) + mock_client.register_task_definition.assert_called_once() + mock_client.deregister_task_definition.assert_called_once_with( + taskDefinition="some_task_def_arn" + ) class TestDeleteWorkers(TestCase): @@ -2607,7 +2725,7 @@ def test_delete_log_group_with_exception( ) -class TestCreateEKSNodegroup(unittest.TestCase): +class TestCreateEKSNodegroup(TestCase): @patch("challenges.aws_utils.get_boto3_client") @patch("challenges.aws_utils.get_code_upload_setup_meta_for_challenge") @patch("challenges.utils.get_aws_credentials_for_challenge") @@ -2967,7 +3085,9 @@ def test_setup_eks_cluster_subnets_creation( @pytest.mark.django_db -class TestSetupEC2(TestCase): +class TestSetupEC2( + django.test.TestCase +): # Uses Django TestCase for database operations (User, Challenge models) def setUp(self): self.user = User.objects.create( username="someuser", @@ -3042,3 +3162,1684 @@ def test_update_sqs_retention_period_task( mock_update_sqs_retention_period.assert_called_once_with( mock_challenge_obj ) + + +# ===================== RETENTION TESTS ===================== + + +class TestRetentionCalculations(TestCase): + """Simplified tests for retention period calculations""" + + def test_retention_period_calculation(self): + """Test basic retention period calculations""" + from challenges.aws_utils import calculate_retention_period_days + + now = timezone.now() + + # Future end date: 10 days from now should give indefinite retention (no consent) + future_end = now + timedelta(days=10) + self.assertEqual(calculate_retention_period_days(future_end), 3653) + + # Past end date: 5 days ago should give indefinite retention (no consent) + past_end = now - timedelta(days=5) + self.assertEqual(calculate_retention_period_days(past_end), 3653) + + # Very old end date should give indefinite retention (no consent) + old_end = now - timedelta(days=50) + self.assertEqual(calculate_retention_period_days(old_end), 3653) + + def test_aws_retention_mapping(self): + """Test mapping to valid AWS CloudWatch values""" + from challenges.aws_utils import map_retention_days_to_aws_values + + # Test exact matches + self.assertEqual(map_retention_days_to_aws_values(30), 30) + self.assertEqual(map_retention_days_to_aws_values(90), 90) + + # Test rounding up to next valid value + self.assertEqual(map_retention_days_to_aws_values(25), 30) + self.assertEqual(map_retention_days_to_aws_values(100), 120) + + # Test edge cases + self.assertEqual(map_retention_days_to_aws_values(0), 1) + self.assertEqual(map_retention_days_to_aws_values(5000), 3653) + + def test_retention_period_with_consent_and_without_consent(self): + from types import SimpleNamespace + + from challenges.aws_utils import calculate_retention_period_days + + now = timezone.now() + end_date = now + timedelta(days=5) + # Challenge with consent + challenge_with_consent = SimpleNamespace( + retention_policy_consent=True, log_retention_days_override=None + ) + self.assertEqual( + calculate_retention_period_days(end_date, challenge_with_consent), + 30, + ) + # Challenge without consent + challenge_without_consent = SimpleNamespace( + retention_policy_consent=False, log_retention_days_override=None + ) + self.assertEqual( + calculate_retention_period_days( + end_date, challenge_without_consent + ), + 3653, + ) + + +def test_set_cloudwatch_log_retention_requires_consent(): + with patch( + "challenges.models.Challenge.objects.get" + ) as mock_challenge, patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + mock_challenge.return_value.retention_policy_consent = False + mock_phases.return_value.exists.return_value = True + mock_phase = MagicMock() + mock_phase.end_date = timezone.now() + timedelta(days=10) + mock_phases.return_value.__iter__.return_value = iter([mock_phase]) + result = set_cloudwatch_log_retention(123) + assert result["requires_consent"] is True + assert "host has not consented" in result["error"] + + +@pytest.mark.django_db +class TestCloudWatchRetention( + django.test.TestCase +): # Uses Django TestCase for database operations (Challenge, ChallengePhase models) + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + def test_set_log_retention_success( + self, mock_log_group, mock_creds, mock_client + ): + """Test successful log retention setting""" + # Setup mocks + mock_log_group.return_value = "test-log-group" + mock_creds.return_value = {"aws_access_key_id": "test"} + mock_logs_client = MagicMock() + mock_client.return_value = mock_logs_client + + # Mock challenge with phase + with patch( + "challenges.models.Challenge.objects.get" + ) as mock_challenge: + with patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + mock_challenge.return_value.log_retention_days_override = None + mock_phase = MagicMock() + mock_phase.end_date = timezone.now() + timedelta(days=10) + # Properly mock the queryset + mock_phases_qs = MagicMock() + mock_phases_qs.exists.return_value = True + mock_phases_qs.__iter__.return_value = iter([mock_phase]) + mock_phases.return_value = mock_phases_qs + + result = set_cloudwatch_log_retention(123, retention_days=30) + + self.assertTrue(result["success"]) + self.assertEqual(result["retention_days"], 30) + mock_logs_client.put_retention_policy.assert_called_once() + + def test_log_retention_no_phases(self): + """Test error when no phases exist""" + with patch("challenges.models.Challenge.objects.get"): + with patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + mock_phases.return_value.exists.return_value = False + + result = set_cloudwatch_log_retention(123) + self.assertIn("error", result) + self.assertIn("No phases found", result["error"]) + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + @patch("challenges.aws_utils.logger") + def test_set_log_retention_resource_not_found( + self, mock_logger, mock_log_group, mock_creds, mock_client + ): + """Test AWS ResourceNotFoundException is handled""" + mock_log_group.return_value = "test-log-group" + mock_creds.return_value = {"aws_access_key_id": "test"} + mock_logs_client = MagicMock() + # Simulate AWS ResourceNotFoundException + error_response = { + "Error": { + "Code": "ResourceNotFoundException", + "Message": "Log group not found", + } + } + client_error = ClientError(error_response, "PutRetentionPolicy") + mock_logs_client.put_retention_policy.side_effect = client_error + mock_client.return_value = mock_logs_client + with patch( + "challenges.models.Challenge.objects.get" + ) as mock_challenge, patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + mock_challenge.return_value.log_retention_days_override = None + mock_phase = MagicMock() + mock_phase.end_date = timezone.now() + timedelta(days=10) + mock_phases_qs = MagicMock() + mock_phases_qs.exists.return_value = True + mock_phases_qs.__iter__.return_value = iter([mock_phase]) + mock_phases.return_value = mock_phases_qs + result = set_cloudwatch_log_retention(123, retention_days=30) + self.assertIn("error", result) + self.assertIn("Log group not found", result["error"]) + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + @patch("challenges.aws_utils.logger") + def test_set_log_retention_unexpected_exception( + self, mock_logger, mock_log_group, mock_creds, mock_client + ): + """Test unexpected exception is handled""" + from challenges.aws_utils import set_cloudwatch_log_retention + + mock_log_group.return_value = "test-log-group" + mock_creds.return_value = {"aws_access_key_id": "test"} + mock_logs_client = MagicMock() + # Simulate generic Exception + mock_logs_client.put_retention_policy.side_effect = Exception( + "Some error" + ) + mock_client.return_value = mock_logs_client + with patch( + "challenges.models.Challenge.objects.get" + ) as mock_challenge, patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + mock_challenge.return_value.log_retention_days_override = None + mock_phase = MagicMock() + mock_phase.end_date = timezone.now() + timedelta(days=10) + mock_phases_qs = MagicMock() + mock_phases_qs.exists.return_value = True + mock_phases_qs.__iter__.return_value = iter([mock_phase]) + mock_phases.return_value = mock_phases_qs + result = set_cloudwatch_log_retention(123, retention_days=30) + self.assertIn("error", result) + self.assertIn("Some error", result["error"]) + mock_logger.exception.assert_called() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + def test_set_log_retention_model_override( + self, mock_log_group, mock_creds, mock_client + ): + """Test model override for retention days is used""" + from challenges.aws_utils import set_cloudwatch_log_retention + + mock_log_group.return_value = "test-log-group" + mock_creds.return_value = {"aws_access_key_id": "test"} + mock_logs_client = MagicMock() + mock_client.return_value = mock_logs_client + with patch( + "challenges.models.Challenge.objects.get" + ) as mock_challenge, patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + mock_challenge.return_value.log_retention_days_override = 90 + mock_phase = MagicMock() + mock_phase.end_date = timezone.now() + timedelta(days=10) + mock_phases_qs = MagicMock() + mock_phases_qs.exists.return_value = True + mock_phases_qs.__iter__.return_value = iter([mock_phase]) + mock_phases.return_value = mock_phases_qs + result = set_cloudwatch_log_retention(123) + self.assertTrue(result["success"]) + self.assertEqual(result["retention_days"], 90) + mock_logs_client.put_retention_policy.assert_called_once() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + def test_set_log_retention_calculated_days( + self, mock_log_group, mock_creds, mock_client + ): + """Test calculated retention days is used when no override or CLI arg""" + from challenges.aws_utils import ( + calculate_retention_period_days, + map_retention_days_to_aws_values, + set_cloudwatch_log_retention, + ) + + mock_log_group.return_value = "test-log-group" + mock_creds.return_value = {"aws_access_key_id": "test"} + mock_logs_client = MagicMock() + mock_client.return_value = mock_logs_client + with patch( + "challenges.models.Challenge.objects.get" + ) as mock_challenge, patch( + "challenges.models.ChallengePhase.objects.filter" + ) as mock_phases: + # Mock challenge with consent to get 30 days retention + mock_challenge_obj = MagicMock() + mock_challenge_obj.log_retention_days_override = None + mock_challenge_obj.retention_policy_consent = True + mock_challenge.return_value = mock_challenge_obj + + mock_phase = MagicMock() + mock_phase.end_date = timezone.now() + timedelta(days=5) + mock_phases_qs = MagicMock() + mock_phases_qs.exists.return_value = True + mock_phases_qs.__iter__.return_value = iter([mock_phase]) + mock_phases.return_value = mock_phases_qs + expected_days = calculate_retention_period_days( + mock_phase.end_date, mock_challenge_obj + ) + expected_aws_days = map_retention_days_to_aws_values(expected_days) + result = set_cloudwatch_log_retention(123) + self.assertTrue(result["success"]) + self.assertEqual(result["retention_days"], expected_aws_days) + mock_logs_client.put_retention_policy.assert_called_once() + + +class TestSubmissionRetention(TestCase): + """Simplified submission retention tests""" + + @patch("challenges.aws_utils.calculate_retention_period_days") + def test_submission_retention_date_calculation( + self, mock_calculate_retention + ): + """Test submission retention date calculation""" + from challenges.aws_utils import calculate_submission_retention_date + + # Mock challenge phase with proper challenge object + mock_phase = MagicMock() + mock_challenge = MagicMock() + mock_challenge.retention_policy_consent = True + mock_phase.challenge = mock_challenge + + # Test private phase with end date + mock_phase.end_date = timezone.now() - timedelta(days=5) + mock_phase.is_public = False + + # Mock the retention calculation to return 30 days + mock_calculate_retention.return_value = 30 + + expected_date = mock_phase.end_date + timedelta(days=30) + result = calculate_submission_retention_date(mock_phase) + self.assertEqual(result, expected_date) + + # Test public phase (should return None) + mock_phase.is_public = True + result = calculate_submission_retention_date(mock_phase) + self.assertIsNone(result) + + # Test phase without end date + mock_phase.end_date = None + mock_phase.is_public = False + result = calculate_submission_retention_date(mock_phase) + self.assertIsNone(result) + + @patch("jobs.models.Submission.objects.filter") + def test_cleanup_no_submissions(self, mock_filter): + """Test cleanup when no submissions are eligible""" + from challenges.aws_utils import cleanup_expired_submission_artifacts + + mock_queryset = MagicMock() + mock_queryset.exists.return_value = False + mock_filter.return_value = mock_queryset + + result = cleanup_expired_submission_artifacts() + + self.assertEqual(result["total_processed"], 0) + self.assertEqual(result["successful_deletions"], 0) + self.assertEqual(result["failed_deletions"], 0) + + +class TestUtilityFunctions(TestCase): + """Test utility functions""" + + def test_log_group_name_generation(self): + """Test log group name format""" + from challenges.aws_utils import get_log_group_name + + import apps.challenges.aws_utils as aws_utils + + with patch.object(aws_utils.settings, "ENVIRONMENT", "test"): + result = get_log_group_name(123) + expected = "challenge-pk-123-test-workers" + self.assertEqual(result, expected) + + @patch("challenges.aws_utils.set_cloudwatch_log_retention") + def test_retention_callback_functions(self, mock_set_retention): + """Test retention callback functions""" + from challenges.aws_utils import ( + update_challenge_log_retention_on_approval, + ) + + import apps.challenges.aws_utils as aws_utils + + mock_challenge = MagicMock() + mock_challenge.pk = 123 + + # Test production mode + with patch.object(aws_utils.settings, "DEBUG", False): + mock_set_retention.return_value = {"success": True} + update_challenge_log_retention_on_approval(mock_challenge) + mock_set_retention.assert_called_with(123) + + # Test debug mode (should not call) + mock_set_retention.reset_mock() + with patch.object(aws_utils.settings, "DEBUG", True): + update_challenge_log_retention_on_approval(mock_challenge) + mock_set_retention.assert_not_called() + + +class TestEmailFunctions(TestCase): + """Test email utility functions""" + + def setUp(self): + self.mock_challenge = MagicMock() + self.mock_challenge.title = "Test Challenge" + self.mock_challenge.id = 123 + self.mock_challenge.image = None + + @patch("challenges.aws_utils.EmailMultiAlternatives") + @patch("challenges.aws_utils.render_to_string") + @patch("challenges.aws_utils.settings") + def test_send_template_email_success( + self, mock_settings, mock_render, mock_email_class + ): + """Test successful template email sending""" + from challenges.aws_utils import send_template_email + + # Setup mocks + mock_settings.CLOUDCV_TEAM_EMAIL = "team@eval.ai" + mock_render.return_value = "Test email" + mock_email_instance = MagicMock() + mock_email_class.return_value = mock_email_instance + + # Call the function + result = send_template_email( + recipient_email="test@example.com", + subject="Test Subject", + template_name="test_template.html", + template_context={"key": "value"}, + ) + + # Assertions + self.assertTrue(result) + mock_email_class.assert_called_once() + mock_email_instance.attach_alternative.assert_called_once() + mock_email_instance.send.assert_called_once() + + @patch("challenges.aws_utils.EmailMultiAlternatives") + @patch("challenges.aws_utils.render_to_string") + @patch("challenges.aws_utils.settings") + def test_send_template_email_failure( + self, mock_settings, mock_render, mock_email_class + ): + """Test template email sending failure""" + from challenges.aws_utils import send_template_email + + # Setup mocks to raise exception + mock_settings.CLOUDCV_TEAM_EMAIL = "team@eval.ai" + mock_render.side_effect = Exception("Template error") + + # Call the function + result = send_template_email( + recipient_email="test@example.com", + subject="Test Subject", + template_name="test_template.html", + template_context={"key": "value"}, + ) + + # Assertions + self.assertFalse(result) + + @patch("challenges.aws_utils.send_template_email") + @patch("challenges.aws_utils.settings") + def test_send_retention_warning_email( + self, mock_settings, mock_send_template + ): + """Test retention warning email sending""" + from challenges.aws_utils import send_retention_warning_email + + # Setup + mock_settings.EVALAI_API_SERVER = "http://localhost:8000" + mock_settings.CLOUDCV_TEAM_EMAIL = "team@eval.ai" + mock_send_template.return_value = True + + warning_date = timezone.now() + timedelta(days=14) + submission_count = 5 + + # Call the function + result = send_retention_warning_email( + challenge=self.mock_challenge, + recipient_email="host@example.com", + submission_count=submission_count, + warning_date=warning_date, + ) + + # Assertions + self.assertTrue(result) + mock_send_template.assert_called_once() + + # Check the call arguments + call_args = mock_send_template.call_args + self.assertEqual(call_args[1]["recipient_email"], "host@example.com") + self.assertEqual( + call_args[1]["template_name"], "challenges/retention_warning.html" + ) + self.assertIn("CHALLENGE_NAME", call_args[1]["template_context"]) + self.assertEqual( + call_args[1]["template_context"]["CHALLENGE_NAME"], + "Test Challenge", + ) + + @patch("challenges.aws_utils.send_template_email") + @patch("challenges.aws_utils.settings") + def test_send_retention_warning_email_with_image( + self, mock_settings, mock_send_template + ): + """Test retention warning email with challenge image""" + from challenges.aws_utils import send_retention_warning_email + + # Setup challenge with image + mock_image = MagicMock() + mock_image.url = "http://example.com/image.jpg" + self.mock_challenge.image = mock_image + + mock_settings.EVALAI_API_SERVER = "http://localhost:8000" + mock_settings.CLOUDCV_TEAM_EMAIL = "team@eval.ai" + mock_send_template.return_value = True + + warning_date = timezone.now() + timedelta(days=14) + submission_count = 3 + + # Call the function + result = send_retention_warning_email( + challenge=self.mock_challenge, + recipient_email="host@example.com", + submission_count=submission_count, + warning_date=warning_date, + ) + + # Assertions + self.assertTrue(result) + call_args = mock_send_template.call_args + template_context = call_args[1]["template_context"] + self.assertEqual( + template_context["CHALLENGE_IMAGE_URL"], + "http://example.com/image.jpg", + ) + + +class TestCleanupExpiredSubmissionArtifacts( + django.test.TestCase +): # Uses Django TestCase for database operations (User, Challenge, ChallengePhase, Submission models) + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + retention_policy_consent=True, # Enable retention for testing + ) + self.challenge_phase = ChallengePhase.objects.create( + name="Test Phase", + description="Test Phase Description", + challenge=self.challenge, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() - timedelta(days=1), + is_public=False, + ) + + @patch("challenges.aws_utils.delete_submission_files_from_storage") + def test_cleanup_expired_submission_artifacts_success( + self, mock_delete_files + ): + from challenges.aws_utils import cleanup_expired_submission_artifacts + from jobs.models import Submission + + submission = Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=timezone.now() - timedelta(days=1), + is_artifact_deleted=False, + ) + + # Mock the function to also update the submission + def mock_delete_side_effect(sub): + sub.is_artifact_deleted = True + sub.save(update_fields=["is_artifact_deleted"]) + return { + "success": True, + "deleted_files": ["file1.txt"], + "failed_files": [], + "submission_id": sub.pk, + } + + mock_delete_files.side_effect = mock_delete_side_effect + result = cleanup_expired_submission_artifacts() + self.assertEqual(result["total_processed"], 1) + self.assertEqual(result["successful_deletions"], 1) + submission.refresh_from_db() + self.assertTrue(submission.is_artifact_deleted) + + @patch("challenges.aws_utils.delete_submission_files_from_storage") + def test_cleanup_expired_submission_artifacts_failure( + self, mock_delete_files + ): + from challenges.aws_utils import cleanup_expired_submission_artifacts + from jobs.models import Submission + + submission = Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=timezone.now() - timedelta(days=1), + is_artifact_deleted=False, + ) + mock_delete_files.return_value = { + "success": False, + "error": "S3 deletion failed", + "submission_id": submission.pk, + } + result = cleanup_expired_submission_artifacts() + self.assertEqual(result["total_processed"], 1) + self.assertEqual(result["failed_deletions"], 1) + self.assertEqual(len(result["errors"]), 1) + mock_delete_files.assert_called_once_with(submission) + + def test_cleanup_expired_submission_artifacts_no_eligible_submissions( + self, + ): + from challenges.aws_utils import cleanup_expired_submission_artifacts + + result = cleanup_expired_submission_artifacts() + self.assertEqual(result["total_processed"], 0) + + +class TestWeeklyRetentionNotificationsAndConsentLog(django.test.TestCase): + """Test the weekly retention notifications and consent logging function.""" + + def setUp(self): + """Set up test data.""" + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + inform_hosts=True, + ) + self.challenge_phase = ChallengePhase.objects.create( + name="Test Phase", + description="Test Phase Description", + challenge=self.challenge, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() - timedelta(days=1), + is_public=False, + ) + + @patch("challenges.aws_utils.send_retention_warning_email") + @patch("challenges.aws_utils.settings") + @patch("django.utils.timezone.now") + def test_weekly_retention_notifications_success( + self, mock_now, mock_settings, mock_send_email + ): + """Test successful retention warning notification.""" + from datetime import datetime, timedelta + + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + from django.utils import timezone + from jobs.models import Submission + + # Freeze time to a fixed datetime + fixed_now = datetime(2025, 7, 16, 12, 0, 0, tzinfo=timezone.utc) + mock_now.return_value = fixed_now + warning_date = fixed_now + timedelta(days=14) + + # Setup challenge with all required conditions + self.challenge.inform_hosts = True + self.challenge.save() + + # Mock settings + mock_settings.EVALAI_API_SERVER = "http://localhost" + + # Create submission with exact warning date + Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=warning_date, + is_artifact_deleted=False, + ) + + # Mock email sending to succeed + mock_send_email.return_value = True + + # Patch the method on the class, not the instance + with patch.object( + ChallengeHostTeam, + "get_all_challenge_host_email", + return_value=["host@test.com"], + ): + # Call the function inside the patch context + result = weekly_retention_notifications_and_consent_log() + + # Verify the result + self.assertEqual(result["notifications_sent"], 1) + + # Verify email was sent with correct parameters + mock_send_email.assert_called_once_with( + challenge=self.challenge, + recipient_email="host@test.com", + submission_count=1, + warning_date=warning_date, + ) + + @patch("challenges.aws_utils.send_retention_warning_email") + @patch("challenges.aws_utils.settings") + @patch("django.utils.timezone.now") + def test_weekly_retention_notifications_no_submissions( + self, mock_now, mock_settings, mock_send_email + ): + """Test when no submissions require warnings.""" + from datetime import datetime + + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + from django.utils import timezone + + # Freeze time to a fixed datetime + fixed_now = datetime(2025, 7, 16, 12, 0, 0, tzinfo=timezone.utc) + mock_now.return_value = fixed_now + + # Mock settings + mock_settings.EVALAI_API_SERVER = "http://localhost" + + # Call the function (no submissions created) + result = weekly_retention_notifications_and_consent_log() + + # Verify no notifications were sent + self.assertEqual(result["notifications_sent"], 0) + mock_send_email.assert_not_called() + + @patch("challenges.aws_utils.send_retention_warning_email") + @patch("challenges.aws_utils.settings") + @patch("django.utils.timezone.now") + def test_weekly_retention_notifications_inform_hosts_false( + self, mock_now, mock_settings, mock_send_email + ): + """Test when challenge has inform_hosts=False.""" + from datetime import datetime, timedelta + + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + from django.utils import timezone + from jobs.models import Submission + + # Freeze time to a fixed datetime + fixed_now = datetime(2025, 7, 16, 12, 0, 0, tzinfo=timezone.utc) + mock_now.return_value = fixed_now + warning_date = fixed_now + timedelta(days=14) + + # Setup challenge with inform_hosts=False + self.challenge.inform_hosts = False + self.challenge.save() + + # Mock settings + mock_settings.EVALAI_API_SERVER = "http://localhost" + + # Create submission with exact warning date + Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=warning_date, + is_artifact_deleted=False, + ) + + # Mock email sending to succeed + mock_send_email.return_value = True + + # Call the function + result = weekly_retention_notifications_and_consent_log() + + # Verify no notifications were sent due to inform_hosts=False + self.assertEqual(result["notifications_sent"], 0) + mock_send_email.assert_not_called() + + @patch("challenges.aws_utils.send_retention_warning_email") + @patch("challenges.aws_utils.settings") + @patch("django.utils.timezone.now") + def test_weekly_retention_notifications_no_api_server( + self, mock_now, mock_settings, mock_send_email + ): + """Test when EVALAI_API_SERVER is not set.""" + from datetime import datetime, timedelta + + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + from django.utils import timezone + from jobs.models import Submission + + # Freeze time to a fixed datetime + fixed_now = datetime(2025, 7, 16, 12, 0, 0, tzinfo=timezone.utc) + mock_now.return_value = fixed_now + warning_date = fixed_now + timedelta(days=14) + + # Setup challenge + self.challenge.inform_hosts = True + self.challenge.save() + + # Mock settings without EVALAI_API_SERVER + mock_settings.EVALAI_API_SERVER = None + + # Create submission with exact warning date + Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=warning_date, + is_artifact_deleted=False, + ) + + # Mock email sending to succeed + mock_send_email.return_value = True + + # Call the function + result = weekly_retention_notifications_and_consent_log() + + # Verify no notifications were sent due to missing API server setting + self.assertEqual(result["notifications_sent"], 0) + mock_send_email.assert_not_called() + + @patch("challenges.aws_utils.settings") + def test_weekly_retention_notifications_with_consent_changes( + self, mock_settings + ): + """Test consent change logging functionality.""" + from datetime import timedelta + + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + from django.utils import timezone + + # Setup consent change + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_date = ( + timezone.now() - timedelta(days=3) + ) + self.challenge.retention_policy_consent_by = self.user + self.challenge.save() + + # Mock settings as the notification part might still run + mock_settings.EVALAI_API_SERVER = "http://localhost" + + # Use assertLogs to capture logging from 'challenges.aws_utils' + with self.assertLogs("challenges.aws_utils", level="INFO") as cm: + result = weekly_retention_notifications_and_consent_log() + + # Verify the log output contains consent change information + log_output = "\n".join(cm.output) + self.assertIn( + "[RetentionConsent] 1 consent changes in the last week:", + log_output, + ) + self.assertIn("[RetentionConsent] ✅", log_output) + self.assertIn( + f"Challenge {self.challenge.pk}: {self.challenge.title[:50]}", + log_output, + ) + self.assertIn( + f"[RetentionConsent] Consent by: {self.user.username}", + log_output, + ) + + # Verify the original assertions are still valid + self.assertIn("notifications_sent", result) + self.assertEqual( + result["notifications_sent"], 0 + ) # No warnings, just consent logging + + @patch("challenges.aws_utils.send_retention_warning_email") + @patch("challenges.aws_utils.settings") + @patch("django.utils.timezone.now") + def test_weekly_retention_notifications_email_exception( + self, mock_now, mock_settings, mock_send_email + ): + """Test that the task handles exceptions during email sending.""" + from datetime import datetime, timedelta + + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + from django.utils import timezone + from jobs.models import Submission + + # Freeze time to a fixed datetime + fixed_now = datetime(2025, 7, 16, 12, 0, 0, tzinfo=timezone.utc) + mock_now.return_value = fixed_now + warning_date = fixed_now + timedelta(days=14) + + # Setup challenge with all required conditions + self.challenge.inform_hosts = True + self.challenge.save() + + # Mock settings + mock_settings.EVALAI_API_SERVER = "http://localhost" + + # Create submission with exact warning date + Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=warning_date, + is_artifact_deleted=False, + ) + + # Mock the email function to raise an error + mock_send_email.side_effect = Exception("SMTP server is down") + + # Use the same patch.object fix + with patch.object( + ChallengeHostTeam, + "get_all_challenge_host_email", + return_value=["host@test.com"], + ): + with self.assertLogs("challenges.aws_utils", level="ERROR") as cm: + result = weekly_retention_notifications_and_consent_log() + + # Assert that no notifications were successfully sent + self.assertEqual(result["notifications_sent"], 0) + + # Assert that the error was logged + log_output = "\n".join(cm.output) + self.assertIn( + f"Failed to send retention warning email to host@test.com for challenge {self.challenge.pk}: SMTP server is down", + log_output, + ) + + +class TestRecordHostRetentionConsent( + django.test.TestCase +): # Uses Django TestCase for database operations (User, Challenge models) + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + ) + + @patch("challenges.aws_utils.is_user_a_host_of_challenge") + def test_record_host_retention_consent_success(self, mock_is_host): + from challenges.aws_utils import record_host_retention_consent + + mock_is_host.return_value = True + result = record_host_retention_consent( + self.challenge.pk, self.user, "Test consent notes" + ) + self.assertTrue(result["success"]) + self.challenge.refresh_from_db() + self.assertTrue(self.challenge.retention_policy_consent) + self.assertEqual(self.challenge.retention_policy_consent_by, self.user) + + @patch("challenges.aws_utils.is_user_a_host_of_challenge") + def test_record_host_retention_consent_unauthorized(self, mock_is_host): + from challenges.aws_utils import record_host_retention_consent + + mock_is_host.return_value = False + result = record_host_retention_consent(self.challenge.pk, self.user) + self.assertIn("error", result) + self.assertIn("not authorized", result["error"]) + + def test_record_host_retention_consent_challenge_not_found(self): + from challenges.aws_utils import record_host_retention_consent + + result = record_host_retention_consent(99999, self.user) + self.assertIn("error", result) + self.assertIn("does not exist", result["error"]) + + +class TestIsUserAHostOfChallenge( + django.test.TestCase +): # Uses Django TestCase for database operations (User, Challenge, ChallengeHost models) + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + ) + + def test_is_user_a_host_of_challenge_true(self): + from challenges.aws_utils import is_user_a_host_of_challenge + from hosts.models import ChallengeHost + + ChallengeHost.objects.create( + user=self.user, + team_name=self.challenge_host_team, + status=ChallengeHost.ACCEPTED, + ) + result = is_user_a_host_of_challenge(self.user, self.challenge.pk) + self.assertTrue(result) + + def test_is_user_a_host_of_challenge_false(self): + from challenges.aws_utils import is_user_a_host_of_challenge + + result = is_user_a_host_of_challenge(self.user, self.challenge.pk) + self.assertFalse(result) + + def test_is_user_a_host_of_challenge_challenge_not_found(self): + from challenges.aws_utils import is_user_a_host_of_challenge + + result = is_user_a_host_of_challenge(self.user, 99999) + self.assertFalse(result) + + +class TestUpdateChallengeLogRetentionFunctions( + django.test.TestCase +): # Uses Django TestCase for database operations (User, Challenge models) + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + ) + + @patch("challenges.aws_utils.set_cloudwatch_log_retention") + @patch("challenges.aws_utils.settings") + def test_update_challenge_log_retention_on_approval( + self, mock_settings, mock_set_retention + ): + from challenges.aws_utils import ( + update_challenge_log_retention_on_approval, + ) + + mock_settings.DEBUG = False + mock_set_retention.return_value = {"success": True} + update_challenge_log_retention_on_approval(self.challenge) + mock_set_retention.assert_called_once_with(self.challenge.pk) + + @patch("challenges.aws_utils.set_cloudwatch_log_retention") + @patch("challenges.aws_utils.settings") + def test_update_challenge_log_retention_on_restart( + self, mock_settings, mock_set_retention + ): + from challenges.aws_utils import ( + update_challenge_log_retention_on_restart, + ) + + mock_settings.DEBUG = False + mock_set_retention.return_value = {"success": True} + update_challenge_log_retention_on_restart(self.challenge) + mock_set_retention.assert_called_once_with(self.challenge.pk) + + @patch("challenges.aws_utils.set_cloudwatch_log_retention") + @patch("challenges.aws_utils.settings") + def test_update_challenge_log_retention_on_task_def_registration( + self, mock_settings, mock_set_retention + ): + from challenges.aws_utils import ( + update_challenge_log_retention_on_task_def_registration, + ) + + mock_settings.DEBUG = False + mock_set_retention.return_value = {"success": True} + update_challenge_log_retention_on_task_def_registration(self.challenge) + mock_set_retention.assert_called_once_with(self.challenge.pk) + + @patch("challenges.aws_utils.settings") + def test_update_challenge_log_retention_debug_mode(self, mock_settings): + from challenges.aws_utils import ( + update_challenge_log_retention_on_approval, + update_challenge_log_retention_on_restart, + update_challenge_log_retention_on_task_def_registration, + ) + + mock_settings.DEBUG = True + update_challenge_log_retention_on_approval(self.challenge) + update_challenge_log_retention_on_restart(self.challenge) + update_challenge_log_retention_on_task_def_registration(self.challenge) + + +class TestDeleteSubmissionFilesFromStorage( + django.test.TestCase +): # Uses Django TestCase for database operations (User, Challenge, ChallengePhase, Submission models) + def setUp(self): + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + ) + self.challenge_phase = ChallengePhase.objects.create( + name="Test Phase", + description="Test Phase Description", + challenge=self.challenge, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() - timedelta(days=1), + is_public=False, + ) + + @patch("challenges.aws_utils.get_boto3_client") + def test_delete_submission_files_from_storage_success( + self, mock_get_client + ): + from challenges.aws_utils import delete_submission_files_from_storage + from jobs.models import Submission + + submission = Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + is_artifact_deleted=False, + ) + mock_s3_client = MagicMock() + mock_get_client.return_value = mock_s3_client + result = delete_submission_files_from_storage(submission) + self.assertTrue(result["success"]) + submission.refresh_from_db() + self.assertTrue(submission.is_artifact_deleted) + + @patch("challenges.aws_utils.get_boto3_client") + def test_delete_submission_files_from_storage_s3_error( + self, mock_get_client + ): + from botocore.exceptions import ClientError + from challenges.aws_utils import delete_submission_files_from_storage + from jobs.models import Submission + + submission = Submission.objects.create( + participant_team=ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ), + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + is_artifact_deleted=False, + ) + + # Mock a file field to trigger deletion attempt + submission.input_file = "test_file.txt" + submission.save() + + mock_s3_client = MagicMock() + mock_s3_client.delete_object.side_effect = ClientError( + {"Error": {"Code": "AccessDenied"}}, "DeleteObject" + ) + mock_get_client.return_value = mock_s3_client + result = delete_submission_files_from_storage(submission) + self.assertTrue(result["success"]) + self.assertGreater(len(result["failed_files"]), 0) + + +class TestCeleryTasksWithAWSMocking(django.test.TestCase): + """Test Celery tasks with comprehensive AWS mocking for production-like testing.""" + + def setUp(self): + """Set up test data for AWS-mocked Celery task testing.""" + self.user = User.objects.create_user( + username="testuser", email="test@test.com", password="testpass" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + description="Test Description", + creator=self.challenge_host_team, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() + timedelta(days=5), + retention_policy_consent=True, + log_retention_days_override=30, + ) + self.challenge_phase = ChallengePhase.objects.create( + name="Test Phase", + description="Test Phase Description", + challenge=self.challenge, + start_date=timezone.now() - timedelta(days=10), + end_date=timezone.now() - timedelta(days=1), + is_public=False, + ) + self.participant_team = ParticipantTeam.objects.create( + team_name="Test Team", created_by=self.user + ) + + @patch("challenges.aws_utils.delete_submission_files_from_storage") + @patch("challenges.aws_utils.logger") + def test_cleanup_expired_submission_artifacts_with_aws_mocking( + self, mock_logger, mock_delete_files + ): + """Test cleanup task with full AWS S3 mocking.""" + from challenges.aws_utils import cleanup_expired_submission_artifacts + + # Create test submission + submission = Submission.objects.create( + participant_team=self.participant_team, + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=timezone.now() - timedelta(days=1), + is_artifact_deleted=False, + ) + + # Mock the delete function to return success and update submission + def mock_delete_side_effect(sub): + sub.is_artifact_deleted = True + sub.artifact_deletion_date = timezone.now() + sub.save( + update_fields=["is_artifact_deleted", "artifact_deletion_date"] + ) + return { + "success": True, + "deleted_files": ["file1.txt"], + "failed_files": [], + "submission_id": sub.pk, + } + + mock_delete_files.side_effect = mock_delete_side_effect + + result = cleanup_expired_submission_artifacts() + + # Verify the delete function was called + mock_delete_files.assert_called_once_with(submission) + + # Verify results + self.assertEqual(result["total_processed"], 1) + self.assertEqual(result["successful_deletions"], 1) + self.assertEqual(result["failed_deletions"], 0) + + # Verify submission was updated + submission.refresh_from_db() + self.assertTrue(submission.is_artifact_deleted) + self.assertIsNotNone(submission.artifact_deletion_date) + + # Verify logging + mock_logger.info.assert_called() + + @patch("challenges.aws_utils.delete_submission_files_from_storage") + @patch("challenges.aws_utils.logger") + def test_cleanup_expired_submission_artifacts_s3_failure( + self, mock_logger, mock_delete_files + ): + """Test cleanup task when S3 operations fail.""" + from challenges.aws_utils import cleanup_expired_submission_artifacts + + # Create test submission + submission = Submission.objects.create( + participant_team=self.participant_team, + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=timezone.now() - timedelta(days=1), + is_artifact_deleted=False, + ) + + # Mock the delete function to return failure + mock_delete_files.return_value = { + "success": False, + "error": "S3 deletion failed", + "submission_id": submission.pk, + } + + result = cleanup_expired_submission_artifacts() + + # Verify the delete function was called + mock_delete_files.assert_called_once_with(submission) + + # Verify results show failure + self.assertEqual(result["total_processed"], 1) + self.assertEqual(result["successful_deletions"], 0) + self.assertEqual(result["failed_deletions"], 1) + self.assertEqual(len(result["errors"]), 1) + + # Verify error logging + mock_logger.info.assert_called() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + @patch("challenges.aws_utils.logger") + def test_update_submission_retention_dates_with_aws_mocking( + self, + mock_logger, + mock_get_log_group, + mock_get_creds, + mock_get_boto3_client, + ): + """Test update submission retention dates task with AWS CloudWatch mocking.""" + from challenges.aws_utils import update_submission_retention_dates + + # Create test submission + submission = Submission.objects.create( + participant_team=self.participant_team, + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=None, # Will be calculated + ) + + # Mock AWS credentials + mock_get_creds.return_value = { + "aws_access_key_id": "test_key", + "aws_secret_access_key": "test_secret", + "region_name": "us-east-1", + } + + # Mock log group name + mock_get_log_group.return_value = ( + f"/aws/ecs/challenge-{self.challenge.pk}" + ) + + # Mock CloudWatch client + mock_logs_client = Mock() + mock_get_boto3_client.return_value = mock_logs_client + + # Mock successful CloudWatch operation + mock_logs_client.put_retention_policy.return_value = {} + + update_submission_retention_dates() + + # Verify submission was updated + submission.refresh_from_db() + self.assertIsNotNone(submission.retention_eligible_date) + + # Verify logging + mock_logger.info.assert_called() + + @patch("challenges.aws_utils.send_retention_warning_email") + @patch("challenges.aws_utils.settings") + @patch("challenges.aws_utils.logger") + def test_weekly_retention_notifications_with_aws_mocking( + self, mock_logger, mock_settings, mock_send_email + ): + """Test weekly notifications task with AWS and email mocking.""" + from challenges.aws_utils import ( + weekly_retention_notifications_and_consent_log, + ) + + # Mock settings + mock_settings.EVALAI_API_SERVER = "https://test.eval.ai" + mock_settings.CLOUDCV_TEAM_EMAIL = "team@eval.ai" + mock_settings.INFORM_HOSTS_ABOUT_RETENTION = True + + # Mock email sending + mock_send_email.return_value = True + + # Mock the timezone.now() call inside the function + with patch("django.utils.timezone.now", return_value=timezone.now()): + result = weekly_retention_notifications_and_consent_log() + + # Verify result structure + self.assertIn("notifications_sent", result) + + # Verify logging + mock_logger.info.assert_called() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.get_log_group_name") + @patch("challenges.aws_utils.logger") + def test_setup_ec2_with_aws_mocking( + self, + mock_logger, + mock_get_log_group, + mock_get_creds, + mock_get_boto3_client, + ): + """Test EC2 setup task with comprehensive AWS mocking.""" + from challenges.aws_utils import setup_ec2 + + # Mock AWS credentials + mock_get_creds.return_value = { + "aws_access_key_id": "test_key", + "aws_secret_access_key": "test_secret", + "region_name": "us-east-1", + } + + # Mock log group name + mock_get_log_group.return_value = ( + f"/aws/ecs/challenge-{self.challenge.pk}" + ) + + # Mock EC2 client + mock_ec2_client = Mock() + mock_get_boto3_client.return_value = mock_ec2_client + + # Mock EC2 operations + mock_ec2_client.describe_instances.return_value = { + "Reservations": [ + { + "Instances": [ + { + "InstanceId": "i-1234567890abcdef0", + "State": {"Name": "stopped"}, + } + ] + } + ] + } + mock_ec2_client.start_instances.return_value = { + "StartingInstances": [ + { + "InstanceId": "i-1234567890abcdef0", + "CurrentState": {"Name": "starting"}, + } + ] + } + mock_ec2_client.run_instances.return_value = { + "Instances": [ + { + "InstanceId": "i-1234567890abcdef0", + "State": {"Name": "pending"}, + } + ] + } + + # Serialize challenge for task + serialized_challenge = serializers.serialize("json", [self.challenge]) + + setup_ec2(serialized_challenge) + + # Verify AWS interactions + mock_get_boto3_client.assert_called() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.logger") + def test_update_sqs_retention_period_task_with_aws_mocking( + self, mock_logger, mock_get_creds, mock_get_boto3_client + ): + """Test SQS retention period update task with AWS mocking.""" + from challenges.aws_utils import update_sqs_retention_period_task + + # Mock AWS credentials + mock_get_creds.return_value = { + "aws_access_key_id": "test_key", + "aws_secret_access_key": "test_secret", + "region_name": "us-east-1", + } + + # Mock SQS client + mock_sqs_client = Mock() + mock_get_boto3_client.return_value = mock_sqs_client + + # Mock SQS operations + mock_sqs_client.get_queue_attributes.return_value = { + "Attributes": {"MessageRetentionPeriod": "345600"} # 4 days + } + mock_sqs_client.set_queue_attributes.return_value = {} + + # Serialize challenge for task + serialized_challenge = serializers.serialize("json", [self.challenge]) + + update_sqs_retention_period_task(serialized_challenge) + + # Verify AWS interactions + mock_get_boto3_client.assert_called() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.aws_utils.get_code_upload_setup_meta_for_challenge") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.settings") + @patch("challenges.aws_utils.logger") + def test_create_eks_nodegroup_with_aws_mocking( + self, + mock_logger, + mock_settings, + mock_get_creds, + mock_get_setup_meta, + mock_get_boto3_client, + ): + """Test EKS nodegroup creation task with comprehensive AWS mocking.""" + from challenges.aws_utils import create_eks_nodegroup + + # Mock AWS credentials + mock_get_creds.return_value = { + "aws_access_key_id": "test_key", + "aws_secret_access_key": "test_secret", + "region_name": "us-east-1", + "AWS_REGION": "us-east-1", + "AWS_ACCOUNT_ID": "123456789012", + "AWS_ACCESS_KEY_ID": "test_key", + "AWS_SECRET_ACCESS_KEY": "test_secret", + "AWS_STORAGE_BUCKET_NAME": "test-bucket", + } + + # Mock setup metadata + mock_get_setup_meta.return_value = { + "SUBNET_1": "subnet-123", + "SUBNET_2": "subnet-456", + "EKS_NODEGROUP_ROLE_ARN": "arn:aws:iam::123456789012:role/test-nodegroup-role", + } + + # Mock EKS client + mock_eks_client = Mock() + mock_get_boto3_client.return_value = mock_eks_client + + # Mock settings + mock_settings.AWS_SES_REGION_NAME = "us-east-1" + mock_settings.AWS_SES_REGION_ENDPOINT = ( + "https://email.us-east-1.amazonaws.com" + ) + mock_settings.ENVIRONMENT = "test" + + # Mock EKS operations + mock_eks_client.create_nodegroup.return_value = { + "nodegroup": { + "nodegroupName": "test-nodegroup", + "status": "CREATING", + } + } + + # Mock the task to avoid complex dependencies + with patch( + "challenges.aws_utils.create_service_by_challenge_pk" + ) as mock_create_service: + mock_create_service.return_value = {"success": True} + + # Serialize challenge for task + serialized_challenge = serializers.serialize( + "json", [self.challenge] + ) + create_eks_nodegroup(serialized_challenge, "test-cluster") + + # Verify AWS interactions + mock_get_creds.assert_called_with(self.challenge.pk) + mock_get_setup_meta.assert_called_with(self.challenge.pk) + mock_get_boto3_client.assert_called() + mock_eks_client.create_nodegroup.assert_called() + + # Verify logging + mock_logger.info.assert_called() + + @patch("challenges.aws_utils.get_boto3_client") + @patch("challenges.utils.get_aws_credentials_for_challenge") + @patch("challenges.aws_utils.logger") + def test_setup_eks_cluster_with_aws_mocking( + self, mock_logger, mock_get_creds, mock_get_boto3_client + ): + """Test EKS cluster setup task with comprehensive AWS mocking.""" + from challenges.aws_utils import setup_eks_cluster + + # Mock AWS credentials + mock_get_creds.return_value = { + "aws_access_key_id": "test_key", + "aws_secret_access_key": "test_secret", + "region_name": "us-east-1", + } + + # Mock IAM client + mock_iam_client = Mock() + mock_get_boto3_client.return_value = mock_iam_client + + # Mock IAM operations + mock_iam_client.create_role.return_value = { + "Role": { + "RoleName": "test-role", + "Arn": "arn:aws:iam::123456789012:role/test-role", + } + } + mock_iam_client.attach_role_policy.return_value = {} + mock_iam_client.create_policy.return_value = { + "Policy": { + "PolicyName": "test-policy", + "Arn": "arn:aws:iam::123456789012:policy/test-policy", + } + } + + # Serialize challenge for task + serialized_challenge = serializers.serialize("json", [self.challenge]) + + setup_eks_cluster(serialized_challenge) + + # Verify AWS interactions + mock_get_creds.assert_called_with(self.challenge.pk) + mock_get_boto3_client.assert_called_with( + "iam", mock_get_creds.return_value + ) + mock_iam_client.create_role.assert_called() + mock_iam_client.attach_role_policy.assert_called() + mock_iam_client.create_policy.assert_called() + + @patch("challenges.aws_utils.delete_submission_files_from_storage") + def test_celery_task_error_handling(self, mock_delete_files): + """Test that Celery tasks handle errors gracefully.""" + from challenges.aws_utils import cleanup_expired_submission_artifacts + + # Create test submission + submission = Submission.objects.create( + participant_team=self.participant_team, + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=timezone.now() - timedelta(days=1), + is_artifact_deleted=False, + ) + + # Mock the delete function to return failure + mock_delete_files.return_value = { + "success": False, + "error": "Service temporarily unavailable", + "submission_id": submission.pk, + } + + # Task should not crash, should handle error gracefully + result = cleanup_expired_submission_artifacts() + + # Verify error was handled + self.assertEqual(result["total_processed"], 1) + self.assertEqual(result["successful_deletions"], 0) + self.assertEqual(result["failed_deletions"], 1) + self.assertEqual(len(result["errors"]), 1) + + @patch("challenges.aws_utils.delete_submission_files_from_storage") + def test_celery_task_aws_credentials_handling(self, mock_delete_files): + """Test that Celery tasks handle AWS credentials properly.""" + from challenges.aws_utils import cleanup_expired_submission_artifacts + + # Create test submission + submission = Submission.objects.create( + participant_team=self.participant_team, + challenge_phase=self.challenge_phase, + created_by=self.user, + status="finished", + retention_eligible_date=timezone.now() - timedelta(days=1), + is_artifact_deleted=False, + ) + + # Mock the delete function to return success + mock_delete_files.return_value = { + "success": True, + "deleted_files": ["file1.txt"], + "failed_files": [], + "submission_id": submission.pk, + } + + # Task should use default AWS credentials when challenge-specific ones aren't available + result = cleanup_expired_submission_artifacts() + + # Verify the delete function was called + mock_delete_files.assert_called_once_with(submission) + + # Verify task completed successfully + self.assertEqual(result["total_processed"], 1) + self.assertEqual(result["successful_deletions"], 1) diff --git a/tests/unit/challenges/test_models.py b/tests/unit/challenges/test_models.py index fef437059f..9f75174a02 100644 --- a/tests/unit/challenges/test_models.py +++ b/tests/unit/challenges/test_models.py @@ -153,6 +153,110 @@ def test_get_end_date(self): self.challenge.end_date, self.challenge.get_end_date() ) + def test_retention_policy_consent_fields_default(self): + self.assertFalse(self.challenge.retention_policy_consent) + self.assertIsNone(self.challenge.retention_policy_consent_date) + self.assertIsNone(self.challenge.retention_policy_consent_by) + self.assertIsNone(self.challenge.retention_policy_notes) + + def test_retention_policy_consent_fields_set(self): + now = timezone.now() + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_date = now + self.challenge.retention_policy_consent_by = self.user + self.challenge.retention_policy_notes = ( + "Host consented for 30-day retention." + ) + self.challenge.save() + self.challenge.refresh_from_db() + self.assertTrue(self.challenge.retention_policy_consent) + self.assertEqual(self.challenge.retention_policy_consent_date, now) + self.assertEqual(self.challenge.retention_policy_consent_by, self.user) + self.assertEqual( + self.challenge.retention_policy_notes, + "Host consented for 30-day retention.", + ) + + def test_retention_policy_consent_fields_partial_set(self): + """Test setting only some retention consent fields""" + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_date = timezone.now() + self.challenge.save() + self.challenge.refresh_from_db() + + self.assertTrue(self.challenge.retention_policy_consent) + self.assertIsNotNone(self.challenge.retention_policy_consent_date) + self.assertIsNone(self.challenge.retention_policy_consent_by) + self.assertIsNone(self.challenge.retention_policy_notes) + + def test_retention_policy_consent_withdrawal(self): + """Test withdrawing consent by setting consent to False""" + # First set consent + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_date = timezone.now() + self.challenge.retention_policy_consent_by = self.user + self.challenge.retention_policy_notes = "Initial consent" + self.challenge.save() + + # Then withdraw consent + self.challenge.retention_policy_consent = False + self.challenge.save() + self.challenge.refresh_from_db() + + self.assertFalse(self.challenge.retention_policy_consent) + # Other fields should remain unchanged + self.assertIsNotNone(self.challenge.retention_policy_consent_date) + self.assertEqual(self.challenge.retention_policy_consent_by, self.user) + self.assertEqual( + self.challenge.retention_policy_notes, "Initial consent" + ) + + def test_retention_policy_consent_notes_update(self): + """Test updating consent notes without changing other fields""" + # Set initial consent + initial_date = timezone.now() + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_date = initial_date + self.challenge.retention_policy_consent_by = self.user + self.challenge.retention_policy_notes = "Initial notes" + self.challenge.save() + + # Update only notes + self.challenge.retention_policy_notes = "Updated notes" + self.challenge.save() + self.challenge.refresh_from_db() + + self.assertTrue(self.challenge.retention_policy_consent) + self.assertEqual( + self.challenge.retention_policy_consent_date, initial_date + ) + self.assertEqual(self.challenge.retention_policy_consent_by, self.user) + self.assertEqual( + self.challenge.retention_policy_notes, "Updated notes" + ) + + def test_retention_policy_consent_consent_by_change(self): + """Test changing who provided consent""" + other_user = User.objects.create_user( + username="otheruser", email="other@test.com", password="testpass" + ) + + # Set initial consent with one user + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_date = timezone.now() + self.challenge.retention_policy_consent_by = self.user + self.challenge.save() + + # Change consent to another user + self.challenge.retention_policy_consent_by = other_user + self.challenge.save() + self.challenge.refresh_from_db() + + self.assertTrue(self.challenge.retention_policy_consent) + self.assertEqual( + self.challenge.retention_policy_consent_by, other_user + ) + class DatasetSplitTestCase(BaseTestCase): def setUp(self): diff --git a/tests/unit/challenges/test_views.py b/tests/unit/challenges/test_views.py index ba543c7ae2..fc639567c7 100644 --- a/tests/unit/challenges/test_views.py +++ b/tests/unit/challenges/test_views.py @@ -6556,17 +6556,46 @@ def setUp(self): settings.AWS_SES_REGION_ENDPOINT = "email.us-east-1.amazonaws.com" return super().setUp() - def test_update_challenge_approval_when_challenge_exists(self): - self.user.is_staff = True - self.user.save() - self.url = reverse_lazy("challenges:update_challenge_approval") - expected = {"message": "Challenge updated successfully!"} - response = self.client.post( - self.url, - {"challenge_pk": self.challenge.pk, "approved_by_admin": True}, - ) - self.assertEqual(response.data, expected) - self.assertEqual(response.status_code, status.HTTP_200_OK) + @mock.patch("challenges.aws_utils.set_cloudwatch_log_retention") + def test_update_challenge_approval_when_challenge_exists( + self, mock_set_log_retention + ): + from challenges.models import Challenge + from django.db.models.signals import post_save + + # Temporarily disconnect post_save signals to prevent side effects + post_save_receivers = [] + for receiver in post_save._live_receivers(sender=Challenge): + post_save_receivers.append(receiver) + post_save.disconnect(receiver, sender=Challenge) + + try: + self.user.is_staff = True + self.user.save() + + # Mock the log retention function to return success + mock_set_log_retention.return_value = { + "success": True, + "retention_days": 30, + "message": "Retention policy set successfully", + } + + self.url = reverse_lazy("challenges:update_challenge_approval") + expected = {"message": "Challenge updated successfully!"} + response = self.client.post( + self.url, + {"challenge_pk": self.challenge.pk, "approved_by_admin": True}, + ) + self.assertEqual(response.data, expected) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + # Note: set_cloudwatch_log_retention is not called because we disconnected the signals + # to prevent side effects. The test focuses on the view functionality. + + finally: + # Reconnect the post_save signals + for receiver in post_save_receivers: + post_save.connect(receiver, sender=Challenge) def test_update_challenge_approval_when_not_a_staff(self): self.url = reverse_lazy("challenges:update_challenge_approval") @@ -6589,28 +6618,56 @@ def setUp(self): settings.AWS_SES_REGION_ENDPOINT = "email.us-east-1.amazonaws.com" return super().setUp() - def test_update_challenge_attributes_when_challenge_exists(self): - self.url = reverse_lazy("challenges:update_challenge_attributes") - self.user.is_staff = True - self.user.save() + @mock.patch("challenges.aws_utils.set_cloudwatch_log_retention") + def test_update_challenge_attributes_when_challenge_exists( + self, mock_set_log_retention + ): + from challenges.models import Challenge + from django.db.models.signals import post_save - expected = { - "message": f"Challenge attributes updated successfully for challenge with primary key {self.challenge.pk}!" - } + # Temporarily disconnect post_save signals to prevent side effects + post_save_receivers = [] + for receiver in post_save._live_receivers(sender=Challenge): + post_save_receivers.append(receiver) + post_save.disconnect(receiver, sender=Challenge) - response = self.client.post( - self.url, - { - "challenge_pk": self.challenge.pk, - "title": "Updated Title", - "description": "Updated Description", - "approved_by_admin": True, - "ephemeral_storage": 25, - }, - ) + try: + self.url = reverse_lazy("challenges:update_challenge_attributes") + self.user.is_staff = True + self.user.save() + + # Mock the log retention function to return success + mock_set_log_retention.return_value = { + "success": True, + "retention_days": 30, + "message": "Retention policy set successfully", + } - self.assertEqual(response.data, expected) - self.assertEqual(response.status_code, status.HTTP_200_OK) + expected = { + "message": f"Challenge attributes updated successfully for challenge with primary key {self.challenge.pk}!" + } + + response = self.client.post( + self.url, + { + "challenge_pk": self.challenge.pk, + "title": "Updated Title", + "description": "Updated Description", + "approved_by_admin": True, + "ephemeral_storage": 25, + }, + ) + + self.assertEqual(response.data, expected) + self.assertEqual(response.status_code, status.HTTP_200_OK) + + # Note: set_cloudwatch_log_retention is not called because we disconnected the signals + # to prevent side effects. The test focuses on the view functionality. + + finally: + # Reconnect the post_save signals + for receiver in post_save_receivers: + post_save.connect(receiver, sender=Challenge) def test_update_challenge_attributes_when_not_a_staff(self): self.url = reverse_lazy("challenges:update_challenge_attributes") @@ -6633,3 +6690,92 @@ def test_update_challenge_attributes_when_not_a_staff(self): self.assertEqual(response.data, expected) self.assertEqual(response.status_code, status.HTTP_401_UNAUTHORIZED) + + +class TestRetentionConsentAPI(BaseAPITestClass): + def setUp(self): + super().setUp() + + def test_get_retention_consent_status(self): + url = reverse_lazy( + "challenges:get_retention_consent_status", + kwargs={"challenge_pk": self.challenge.pk}, + ) + response = self.client.get(url) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.assertIn("has_consent", response.data) + self.assertIn("is_host", response.data) + self.assertIn("can_provide_consent", response.data) + # consent_by and consent_date are only included when has_consent is True + self.assertFalse(response.data["has_consent"]) + + def test_get_retention_consent_status_not_found(self): + url = reverse_lazy( + "challenges:get_retention_consent_status", + kwargs={"challenge_pk": 99999}, + ) + response = self.client.get(url) + self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND) + + def test_provide_retention_consent(self): + url = reverse_lazy( + "challenges:update_retention_consent", + kwargs={"challenge_pk": self.challenge.pk}, + ) + data = {"consent": True} + response = self.client.post(url, data) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.challenge.refresh_from_db() + self.assertTrue(self.challenge.retention_policy_consent) + + def test_withdraw_retention_consent(self): + self.challenge.retention_policy_consent = True + self.challenge.retention_policy_consent_by = self.user + self.challenge.retention_policy_consent_date = timezone.now() + self.challenge.save() + url = reverse_lazy( + "challenges:update_retention_consent", + kwargs={"challenge_pk": self.challenge.pk}, + ) + data = {"consent": False} + response = self.client.post(url, data) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.challenge.refresh_from_db() + self.assertFalse(self.challenge.retention_policy_consent) + + def test_retention_consent_unauthorized(self): + other_user = User.objects.create( + username="otheruser", + email="other@test.com", + password="secret_password", + ) + self.client.force_authenticate(user=other_user) + url = reverse_lazy( + "challenges:update_retention_consent", + kwargs={"challenge_pk": self.challenge.pk}, + ) + data = {"consent": True} + response = self.client.post(url, data) + self.assertEqual(response.status_code, status.HTTP_403_FORBIDDEN) + + def test_retention_consent_invalid_data(self): + url = reverse_lazy( + "challenges:update_retention_consent", + kwargs={"challenge_pk": self.challenge.pk}, + ) + data = {"invalid_field": True} + response = self.client.post(url, data) + self.assertEqual(response.status_code, status.HTTP_400_BAD_REQUEST) + + def test_retention_consent_with_notes(self): + url = reverse_lazy( + "challenges:update_retention_consent", + kwargs={"challenge_pk": self.challenge.pk}, + ) + data = {"consent": True, "notes": "Test consent notes"} + response = self.client.post(url, data) + self.assertEqual(response.status_code, status.HTTP_200_OK) + self.challenge.refresh_from_db() + self.assertEqual( + self.challenge.retention_policy_notes, "Test consent notes" + ) diff --git a/tests/unit/jobs/test_models.py b/tests/unit/jobs/test_models.py index d41bbe871b..a945f6c5f1 100644 --- a/tests/unit/jobs/test_models.py +++ b/tests/unit/jobs/test_models.py @@ -4,6 +4,7 @@ import pytest import rest_framework +from challenges.aws_utils import calculate_submission_retention_date from challenges.models import Challenge, ChallengePhase from django.contrib.auth.models import User from django.core.files.uploadedfile import SimpleUploadedFile @@ -217,3 +218,86 @@ def test_max_submissions_per_month_reached(self): is_public=True, submitted_at=timezone.now().replace(day=3), ) + + +class SubmissionRetentionModelTests(TestCase): + + def setUp(self): + """Set up test data""" + from django.contrib.auth.models import User + from hosts.models import ChallengeHostTeam + + self.user = User.objects.create( + username="hostuser", email="host@test.com", password="password" + ) + self.challenge_host_team = ChallengeHostTeam.objects.create( + team_name="Test Host Team", created_by=self.user + ) + self.challenge = Challenge.objects.create( + title="Test Challenge", + start_date=timezone.now() - timedelta(days=30), + end_date=timezone.now() + timedelta(days=30), + creator=self.challenge_host_team, + retention_policy_consent=True, # Ensure consent is given + ) + + self.challenge_phase = ChallengePhase.objects.create( + challenge=self.challenge, + name="Test Phase", + start_date=timezone.now() - timedelta(days=20), + end_date=timezone.now() + timedelta(days=10), + is_public=False, + ) + + def test_retention_date_calculation_basic(self): + """Test basic retention date calculation""" + retention_date = calculate_submission_retention_date( + self.challenge_phase + ) + expected_date = self.challenge_phase.end_date + timedelta(days=30) + self.assertEqual(retention_date, expected_date) + + def test_retention_date_public_phase(self): + """Test that public phases don't trigger retention""" + self.challenge_phase.is_public = True + self.challenge_phase.save() + + retention_date = calculate_submission_retention_date( + self.challenge_phase + ) + self.assertIsNone(retention_date) + + def test_retention_date_no_end_date(self): + """Test phases without end dates""" + self.challenge_phase.end_date = None + self.challenge_phase.save() + + retention_date = calculate_submission_retention_date( + self.challenge_phase + ) + self.assertIsNone(retention_date) + + def test_submission_retention_fields_default(self): + """Test default values for retention fields""" + from django.contrib.auth.models import User + from participants.models import ParticipantTeam + + user = User.objects.create( + username="participantuser", + email="participant@test.com", + password="password", + ) + participant_team = ParticipantTeam.objects.create( + team_name="Test Participant Team", created_by=user + ) + submission = Submission.objects.create( + challenge_phase=self.challenge_phase, + participant_team=participant_team, + created_by=user, + status="submitted", + is_public=True, + ) + + self.assertIsNone(submission.retention_eligible_date) + self.assertFalse(submission.is_artifact_deleted) + self.assertIsNone(submission.artifact_deletion_date) diff --git a/tests/unit/worker/test_remote_submission_worker.py b/tests/unit/worker/test_remote_submission_worker.py index 71ef725029..6407307415 100644 --- a/tests/unit/worker/test_remote_submission_worker.py +++ b/tests/unit/worker/test_remote_submission_worker.py @@ -207,8 +207,12 @@ class TestExtractChallengeData(unittest.TestCase): @patch( "scripts.workers.remote_submission_worker.create_dir_as_python_package" ) + @patch( + "scripts.workers.remote_submission_worker.download_and_extract_zip_file" + ) def test_extract_challenge_data_import_exception( self, + mock_download_zip, mock_create_dir_as_pkg, mock_create_dir, mock_import_module, @@ -223,6 +227,10 @@ def test_extract_challenge_data_import_exception( {"id": 1, "test_annotation": "http://example.com/annotation.txt"} ] + # Mock the download functions to prevent actual HTTP requests + mock_download_zip.return_value = None + mock_download_file.return_value = None + # Simulate an exception during import mock_import_module.side_effect = ImportError("Import failed") @@ -238,6 +246,7 @@ def test_extract_challenge_data_import_exception( os.path.join(CHALLENGE_DATA_BASE_DIR, "challenge_1") ) + mock_download_zip.assert_called_once() mock_download_file.assert_called_once() @patch(