Skip to content

Commit 10069e4

Browse files
authored
Merge pull request #4262 from GSA-TTS/main
2 parents 5379731 + 5dc66d2 commit 10069e4

File tree

5 files changed

+428
-117
lines changed

5 files changed

+428
-117
lines changed

backend/cypress/support/general-info.js

+2-2
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ export function testValidGeneralInfo() {
2727
cy.get('#auditee_contact_name').type('John Doe');
2828
cy.get('#auditee_contact_title').type('Keymaster');
2929
cy.get('#auditee_phone').type('5558675309');
30-
cy.get('#auditee_email').type('va@test');
30+
cy.get('#auditee_email').type('va@test.com');
3131

3232
// Auditor information
3333
cy.get('#auditor_ein').type('987654321');
@@ -44,7 +44,7 @@ export function testValidGeneralInfo() {
4444
cy.get('#auditor_contact_name').type('Jane Doe');
4545
cy.get('#auditor_contact_title').type('Auditor');
4646
cy.get('#auditor_phone').type('5555555555');
47-
cy.get('#auditor_email').type('qualified.human.accountant@auditor');
47+
cy.get('#auditor_email').type('qualified.human.accountant@auditor.com');
4848

4949
cy.get('label[for=secondary_auditors-yes]').click();
5050

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
from django.core.management.base import BaseCommand
2+
from dissemination.remove_workbook_artifacts import delete_workbooks
3+
4+
import logging
5+
6+
7+
logger = logging.getLogger(__name__)
8+
9+
10+
class Command(BaseCommand):
11+
help = "Delete workbook artifacts for a specific partition of disseminated reports."
12+
13+
def add_arguments(self, parser):
14+
parser.add_argument(
15+
"--partition_number",
16+
type=int,
17+
required=True,
18+
help="The partition number to process (e.g., 1, 2, 3).",
19+
)
20+
parser.add_argument(
21+
"--total_partitions",
22+
type=int,
23+
required=True,
24+
help="The total number of partitions (e.g., 4 if splitting the load into four parts).",
25+
)
26+
parser.add_argument(
27+
"--page_size",
28+
type=int,
29+
required=False,
30+
default=10,
31+
help="Number of items to process per page",
32+
)
33+
parser.add_argument(
34+
"--pages",
35+
type=int,
36+
required=False,
37+
default=None,
38+
help="Maximum number of pages to process",
39+
)
40+
41+
def handle(self, *args, **options):
42+
partition_number = options["partition_number"]
43+
total_partitions = options["total_partitions"]
44+
page_size = options["page_size"]
45+
pages = options["pages"]
46+
47+
self.stdout.write(
48+
self.style.SUCCESS(
49+
f"Processing partition {partition_number} of {total_partitions}"
50+
)
51+
)
52+
delete_workbooks(
53+
partition_number, total_partitions, page_size=page_size, pages=pages
54+
)
Original file line numberDiff line numberDiff line change
@@ -1,67 +1,216 @@
1-
import logging
2-
3-
from django.conf import settings
4-
from audit.models.models import ExcelFile
5-
from boto3 import client as boto3_client
6-
from botocore.client import ClientError, Config
7-
8-
logger = logging.getLogger(__name__)
9-
10-
11-
def remove_workbook_artifacts(sac):
12-
"""
13-
Remove all workbook artifacts associated with the given sac.
14-
"""
15-
try:
16-
excel_files = ExcelFile.objects.filter(sac=sac)
17-
files = [f"excel/{excel_file.filename}" for excel_file in excel_files]
18-
19-
if files:
20-
# Delete the files from S3 in bulk
21-
delete_files_in_bulk(files, sac)
22-
23-
except ExcelFile.DoesNotExist:
24-
logger.info(f"No files found to delete for report: {sac.report_id}")
25-
except Exception as e:
26-
logger.error(
27-
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
28-
)
29-
30-
31-
def delete_files_in_bulk(filenames, sac):
32-
"""Delete files from S3 in bulk."""
33-
# This client uses the internal endpoint URL because we're making a request to S3 from within the app
34-
s3_client = boto3_client(
35-
service_name="s3",
36-
region_name=settings.AWS_S3_PRIVATE_REGION_NAME,
37-
aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID,
38-
aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY,
39-
endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT,
40-
config=Config(signature_version="s3v4"),
41-
)
42-
43-
try:
44-
delete_objects = [{"Key": filename} for filename in filenames]
45-
46-
response = s3_client.delete_objects(
47-
Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME,
48-
Delete={"Objects": delete_objects},
49-
)
50-
51-
deleted_files = response.get("Deleted", [])
52-
for deleted in deleted_files:
53-
logger.info(
54-
f"Successfully deleted {deleted['Key']} from S3 for report: {sac.report_id}"
55-
)
56-
57-
errors = response.get("Errors", [])
58-
if errors:
59-
for error in errors:
60-
logger.error(
61-
f"Failed to delete {error['Key']} from S3 for report: {sac.report_id}. Error: {error['Message']}" # nosec B608
62-
)
63-
64-
except ClientError as e:
65-
logger.error(
66-
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
67-
)
1+
import logging
2+
import math
3+
4+
from django.conf import settings
5+
from audit.models.models import ExcelFile, SingleAuditChecklist
6+
from boto3 import client as boto3_client
7+
from botocore.client import ClientError, Config
8+
from django.core.paginator import Paginator
9+
from django.core.paginator import PageNotAnInteger, EmptyPage
10+
11+
12+
logger = logging.getLogger(__name__)
13+
14+
15+
def remove_workbook_artifacts(sac):
16+
"""
17+
Remove all workbook artifacts associated with the given sac.
18+
"""
19+
try:
20+
excel_files = ExcelFile.objects.filter(sac=sac)
21+
files = [f"excel/{excel_file.filename}" for excel_file in excel_files]
22+
23+
if files:
24+
# Delete the files from S3 in bulk
25+
delete_files_in_bulk(files, sac)
26+
27+
except ExcelFile.DoesNotExist:
28+
logger.info(f"No files found to delete for report: {sac.report_id}")
29+
except Exception as e:
30+
logger.error(
31+
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
32+
)
33+
34+
35+
def delete_files_in_bulk(filenames, sac):
36+
"""Delete files from S3 in bulk."""
37+
# This client uses the internal endpoint URL because we're making a request to S3 from within the app
38+
s3_client = boto3_client(
39+
service_name="s3",
40+
region_name=settings.AWS_S3_PRIVATE_REGION_NAME,
41+
aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID,
42+
aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY,
43+
endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT,
44+
config=Config(signature_version="s3v4"),
45+
)
46+
47+
try:
48+
delete_objects = [{"Key": filename} for filename in filenames]
49+
50+
response = s3_client.delete_objects(
51+
Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME,
52+
Delete={"Objects": delete_objects},
53+
)
54+
55+
deleted_files = response.get("Deleted", [])
56+
for deleted in deleted_files:
57+
logger.info(
58+
f"Successfully deleted {deleted['Key']} from S3 for report: {sac.report_id}"
59+
)
60+
61+
errors = response.get("Errors", [])
62+
if errors:
63+
for error in errors:
64+
logger.error(
65+
f"Failed to delete {error['Key']} from S3 for report: {sac.report_id}. Error: {error['Message']}" # nosec B608
66+
)
67+
68+
except ClientError as e:
69+
logger.error(
70+
f"Failed to delete files from S3 for report: {sac.report_id}. Error: {e}"
71+
)
72+
73+
74+
def clean_artifacts(sac_list):
75+
"""
76+
Perform necessary cleanup associated with the given list of sac values.
77+
"""
78+
try:
79+
excel_files = ExcelFile.objects.filter(sac__in=sac_list)
80+
files = [f"excel/{excel_file.filename}" for excel_file in excel_files]
81+
82+
if files:
83+
logger.info(
84+
f"Found {len(files)} ExcelFile records for reports: {[sac.report_id for sac in sac_list]}"
85+
)
86+
87+
# Track results but do not delete the ExcelFile records from the database
88+
successful_deletes, failed_deletes = batch_removal(
89+
files,
90+
sac_list,
91+
{
92+
f"excel/{excel_file.filename}": excel_file.sac.report_id
93+
for excel_file in excel_files
94+
},
95+
)
96+
97+
if failed_deletes:
98+
logger.error(
99+
f"Failed to delete the following files from S3: {failed_deletes}"
100+
)
101+
if successful_deletes:
102+
logger.info(
103+
f"Successfully deleted the following files from S3: {successful_deletes}"
104+
)
105+
106+
except Exception as e:
107+
logger.error(f"Failed to process files for the provided sac values. Error: {e}")
108+
109+
110+
def batch_removal(filenames, sac_list, sac_to_report_id_map):
111+
"""Delete files from S3 in bulk and return the results."""
112+
s3_client = boto3_client(
113+
service_name="s3",
114+
region_name=settings.AWS_S3_PRIVATE_REGION_NAME,
115+
aws_access_key_id=settings.AWS_PRIVATE_ACCESS_KEY_ID,
116+
aws_secret_access_key=settings.AWS_PRIVATE_SECRET_ACCESS_KEY,
117+
endpoint_url=settings.AWS_S3_PRIVATE_INTERNAL_ENDPOINT,
118+
config=Config(signature_version="s3v4"),
119+
)
120+
121+
try:
122+
delete_objects = [{"Key": filename} for filename in filenames]
123+
response = s3_client.delete_objects(
124+
Bucket=settings.AWS_PRIVATE_STORAGE_BUCKET_NAME,
125+
Delete={"Objects": delete_objects},
126+
)
127+
128+
successful_deletes = []
129+
failed_deletes = []
130+
deleted_files = response.get("Deleted", [])
131+
for deleted in deleted_files:
132+
filename = deleted["Key"]
133+
successful_deletes.append(
134+
{
135+
"filename": filename,
136+
"sac_report_id": sac_to_report_id_map[filename],
137+
}
138+
)
139+
140+
errors = response.get("Errors", [])
141+
if errors:
142+
for error in errors:
143+
filename = error["Key"]
144+
failed_deletes.append(
145+
{
146+
"filename": filename,
147+
"sac_report_id": sac_to_report_id_map[filename],
148+
"error_message": error["Message"],
149+
}
150+
)
151+
152+
return successful_deletes, failed_deletes
153+
154+
except ClientError as e:
155+
logger.error(
156+
f"Failed to delete files from S3 for sac values: {[sac.report_id for sac in sac_list]}. Error: {e}"
157+
)
158+
return [], [{"error_message": str(e)}]
159+
except Exception as e:
160+
logger.error(f"Failed to delete files from S3. Error: {e}")
161+
return [], [{"error_message": str(e)}]
162+
163+
164+
def delete_workbooks(partition_number, total_partitions, page_size=10, pages=None):
165+
"""Iterates over disseminated reports for the specified partition."""
166+
167+
if partition_number < 1 or partition_number > total_partitions:
168+
raise ValueError(
169+
"Invalid partition number. It must be between 1 and the total number of partitions."
170+
)
171+
172+
all_ids = (
173+
SingleAuditChecklist.objects.filter(
174+
submission_status=SingleAuditChecklist.STATUS.DISSEMINATED
175+
)
176+
.values_list("id", flat=True)
177+
.order_by("id")
178+
)
179+
180+
total_ids = len(all_ids)
181+
ids_per_partition = math.ceil(total_ids / total_partitions)
182+
183+
start_index = (partition_number - 1) * ids_per_partition
184+
end_index = min(partition_number * ids_per_partition, total_ids)
185+
186+
ids_to_process = all_ids[start_index:end_index]
187+
188+
sacs = SingleAuditChecklist.objects.filter(id__in=ids_to_process).order_by("id")
189+
190+
paginator = Paginator(sacs, page_size)
191+
total_pages = (
192+
paginator.num_pages if pages is None else min(pages, paginator.num_pages)
193+
)
194+
195+
logger.info(
196+
f"Retrieving {sacs.count()} reports for partition {partition_number} of {total_partitions}"
197+
)
198+
199+
for page_number in range(1, total_pages + 1):
200+
try:
201+
page = paginator.page(page_number)
202+
logger.info(
203+
f"Processing page {page_number} with {page.object_list.count()} reports."
204+
)
205+
206+
# Extract sac values from the current page
207+
sac_list = list(page.object_list)
208+
clean_artifacts(sac_list)
209+
210+
except PageNotAnInteger:
211+
logger.error(f"Page number {page_number} is not an integer.")
212+
except EmptyPage:
213+
logger.info(f"No more pages to process after page {page_number}.")
214+
break
215+
except Exception as e:
216+
logger.error(f"An error occurred while processing page {page_number}: {e}")

0 commit comments

Comments
 (0)