Skip to content

Commit

Permalink
Merge pull request #3073 from GSA-TTS/main
Browse files Browse the repository at this point in the history
  • Loading branch information
jadudm authored Dec 22, 2023
2 parents d79e626 + 67b29fb commit 4b663fd
Show file tree
Hide file tree
Showing 20 changed files with 208 additions and 97 deletions.
6 changes: 6 additions & 0 deletions backend/audit/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
ExcelFile,
SingleAuditChecklist,
SingleAuditReportFile,
SubmissionEvent,
)


Expand Down Expand Up @@ -78,8 +79,13 @@ class AuditReportAdmin(admin.ModelAdmin):
list_display = ("filename", "user", "date_created", "component_page_numbers")


class SubmissionEventAdmin(admin.ModelAdmin):
list_display = ("sac", "user", "timestamp", "event")


admin.site.register(Access, AccessAdmin)
admin.site.register(DeletedAccess, DeletedAccessAdmin)
admin.site.register(ExcelFile, ExcelFileAdmin)
admin.site.register(SingleAuditChecklist, SACAdmin)
admin.site.register(SingleAuditReportFile, AuditReportAdmin)
admin.site.register(SubmissionEvent, SubmissionEventAdmin)
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,13 +1,11 @@
import logging
import re

from django.conf import settings
from audit.intakelib.intermediate_representation import (
get_range_values_by_name,
get_range_by_name,
)
from .check_aln_three_digit_extension_pattern import (
REGEX_RD_EXTENSION,
REGEX_U_EXTENSION,
)
from audit.intakelib.common import get_message, build_cell_error_tuple

logger = logging.getLogger(__name__)
Expand All @@ -19,7 +17,7 @@ def additional_award_identification(ir):
extension = get_range_values_by_name(ir, "three_digit_extension")
additional = get_range_values_by_name(ir, "additional_award_identification")
errors = []
patterns = [REGEX_RD_EXTENSION, REGEX_U_EXTENSION]
patterns = [settings.REGEX_RD_EXTENSION, settings.REGEX_U_EXTENSION]
for index, (ext, add) in enumerate(zip(extension, additional)):
if any(re.match(pattern, ext) for pattern in patterns) and (
(add is None) or (str(add).strip() == "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,12 +10,6 @@

logger = logging.getLogger(__name__)

# A version of these regexes also exists in Base.libsonnet
REGEX_RD_EXTENSION = r"^RD[0-9]?$"
REGEX_THREE_DIGIT_EXTENSION = r"^[0-9]{3}[A-Za-z]{0,1}$"
REGEX_U_EXTENSION = r"^U[0-9]{2}$"
REGEX_GSA_MIGRATION = rf"^{re.escape(settings.GSA_MIGRATION)}$"


# DESCRIPTION
# The three digit extension should follow one of these formats: ###, RD#, or U##, where # represents a number
Expand All @@ -26,10 +20,10 @@ def aln_three_digit_extension(ir):
errors = []
# Define regex patterns
patterns = [
REGEX_RD_EXTENSION,
REGEX_THREE_DIGIT_EXTENSION,
REGEX_U_EXTENSION,
REGEX_GSA_MIGRATION,
settings.REGEX_RD_EXTENSION,
settings.REGEX_THREE_DIGIT_EXTENSION,
settings.REGEX_U_EXTENSION,
rf"^{re.escape(settings.GSA_MIGRATION)}$",
]
for index, ext in enumerate(extension):
# Check if ext does not match any of the regex patterns
Expand Down
7 changes: 6 additions & 1 deletion backend/audit/intakelib/checks/check_cluster_names.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@

logger = logging.getLogger(__name__)

STATE_CLUSTER = "STATE CLUSTER"
OTHER_CLUSTER = "OTHER CLUSTER NOT LISTED ABOVE"


def check_cluster_names(ir):
"""Check that cluster names are valid when present."""
Expand All @@ -25,7 +28,9 @@ def check_cluster_names(ir):
raise ValueError("ClusterNames.json file contains invalid JSON.")

for index, value in enumerate(range_data["values"]):
if value and value not in valid_json["cluster_names"]:
if value and value not in (
valid_json["cluster_names"] + [STATE_CLUSTER, OTHER_CLUSTER]
):
errors.append(
build_cell_error_tuple(
ir,
Expand Down
11 changes: 11 additions & 0 deletions backend/audit/models/access.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from django.contrib.auth import get_user_model
from django.contrib.auth.models import User as DjangoUser
from django.core.exceptions import MultipleObjectsReturned
from django.db import models
from django.db.models import Q
from .access_roles import ACCESS_ROLES
Expand Down Expand Up @@ -28,11 +29,21 @@ def create(self, **obj_data):
event_user = obj_data.pop("event_user", None)
event_type = obj_data.pop("event_type", None)

# try to pair this Access with an actual User object if we have one for this email address
if obj_data["email"]:
try:
acc_user = User.objects.get(email=obj_data["email"])
# if we don't have a User for this email, leave it as None (unclaimed Access)
except User.DoesNotExist:
acc_user = None
# if we have multiple Users for this email, leave it as None
# this typically happens if a user deletes their Login.gov account
# and creates a new one using the same email address.
# In this case we want to defer assigning this Access to a specific
# User until the next time they login to the FAC, because we don't
# yet know which of their User accounts is the "active" one
except MultipleObjectsReturned:
acc_user = None
if acc_user:
obj_data["user"] = acc_user
result = super().create(**obj_data)
Expand Down
28 changes: 28 additions & 0 deletions backend/audit/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
LateChangeError,
SingleAuditChecklist,
SingleAuditReportFile,
SubmissionEvent,
User,
generate_sac_report_id,
)
Expand Down Expand Up @@ -195,6 +196,33 @@ def test_multiple_certifying_auditor_contact_not_allowed(self):
role="certifying_auditor_contact",
)

def test_access_creation_non_unique_emails(self):
"""
If we attempt to create an Access for an email that has
multiple User objects associated with it, we should not
assign the Access to any specific User object and instead
leave the Access unclaimed. This way, the next time the
user logs into the FAC, the Access will be claimed by
whichever User account is the "active" one.
"""
creator = baker.make(User)

baker.make(User, email="[email protected]")
baker.make(User, email="[email protected]")

sac = baker.make(SingleAuditChecklist)

access = Access.objects.create(
sac=sac,
role="editor",
email="[email protected]",
event_user=creator,
event_type=SubmissionEvent.EventType.ACCESS_GRANTED,
)

self.assertEqual(access.email, "[email protected]")
self.assertIsNone(access.user)


class ExcelFileTests(TestCase):
"""Model tests"""
Expand Down
22 changes: 12 additions & 10 deletions backend/census_historical_migration/historic_data_loader.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import logging
from .models import ELECAUDITHEADER as AuditHeader
from .workbooklib.end_to_end_core import run_end_to_end

from django.contrib.auth import get_user_model
from django.core.paginator import Paginator

logger = logging.getLogger(__name__)

User = get_user_model()

Expand All @@ -18,11 +20,11 @@ def load_historic_data_for_year(audit_year, page_size, pages):
)
paginator = Paginator(submissions_for_year, page_size)

print(f"{submissions_for_year.count()} submissions found for {audit_year}")
logger.info(f"{submissions_for_year.count()} submissions found for {audit_year}")

for page_number in pages:
page = paginator.page(page_number)
print(
logger.info(
f"Processing page {page_number} with {page.object_list.count()} submissions."
)

Expand All @@ -38,21 +40,21 @@ def load_historic_data_for_year(audit_year, page_size, pages):
if has_failed:
error_count += 1
if total_count % 5 == 0:
print(f"Processed = {total_count}, Errors = {error_count}")
logger.info(f"Processed = {total_count}, Errors = {error_count}")

print_results(result_log, error_count, total_count)
log_results(result_log, error_count, total_count)


def print_results(result_log, error_count, total_count):
def log_results(result_log, error_count, total_count):
"""Prints the results of the migration"""

print("********* Loader Summary ***************")
logger.info("********* Loader Summary ***************")

for k, v in result_log.items():
print(k, v)
print("-------------------")
logger.info(k, v)
logger.info("-------------------")

print(f"{error_count} errors out of {total_count}")
logger.info(f"{error_count} errors out of {total_count}")


def create_or_get_user():
Expand All @@ -65,7 +67,7 @@ def create_or_get_user():
if users:
user = users.first()
else:
print("Creating user", user_email, user_name)
logger.info("Creating user", user_email, user_name)
user = User(username=user_name, email=user_email)
user.save()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
from collections import defaultdict

logger = logging.getLogger(__name__)
logger.setLevel(logging.WARNING)

census_to_gsafac_models = list(
apps.get_app_config("census_historical_migration").get_models()
)
Expand Down Expand Up @@ -62,7 +62,7 @@ def handle(self, *args, **options):
return
folder = options.get("folder")
if not folder:
print("Please specify a folder name")
logger.error("Please specify a folder name")
return
chunk_size = options.get("chunksize")
self.process_csv_files(folder, chunk_size)
Expand All @@ -87,20 +87,20 @@ def process_csv_files(self, folder, chunk_size):
def display_row_counts(self, models):
for mdl in models:
row_count = mdl.objects.all().count()
print(f"{row_count} in ", mdl)
logger.info(f"{row_count} in ", mdl)

def delete_data(self):
for mdl in census_to_gsafac_models:
print("Deleting ", mdl)
logger.info("Deleting ", mdl)
mdl.objects.all().delete()

def sample_data(self):
for mdl in census_to_gsafac_models:
print("Sampling ", mdl)
logger.info("Sampling ", mdl)
rows = mdl.objects.all()[:1]
for row in rows:
for col in mdl._meta.fields:
print(f"{col.name}: {getattr(row, col.name)}")
logger.info(f"{col.name}: {getattr(row, col.name)}")

def list_s3_objects(self, bucket_name, folder):
return s3_client.list_objects(Bucket=bucket_name, Prefix=folder)["Contents"]
Expand All @@ -112,22 +112,22 @@ def get_s3_object(self, bucket_name, key, model_obj):
except ClientError:
logger.error("Could not download {}".format(model_obj))
return None
print(f"Obtained {model_obj} from S3")
logger.info(f"Obtained {model_obj} from S3")
return file

def get_model_name(self, name):
print("Processing ", name)
logger.info("Processing ", name)
file_name = name.split("/")[-1].split(".")[0]
for model_name in census_to_gsafac_model_names:
if file_name.lower().startswith(model_name):
print("model_name = ", model_name)
logger.info("model_name = ", model_name)
return model_name
print("Could not find a matching model for ", name)
logger.error("Could not find a matching model for ", name)
return None

def load_data(self, file, model_obj, chunk_size):
dtypes = defaultdict(lambda: str)
print("Starting load data to postgres")
logger.info("Starting load data to postgres")
file.seek(0)
rows_loaded = 0
for df in pd.read_csv(file, iterator=True, chunksize=chunk_size, dtype=dtypes):
Expand All @@ -139,5 +139,5 @@ def load_data(self, file, model_obj, chunk_size):
obj = model_obj(**row)
obj.save()
rows_loaded += df.shape[0]
print(f"Loaded {rows_loaded} rows in ", model_obj)
logger.info(f"Loaded {rows_loaded} rows in ", model_obj)
return None
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)
from census_historical_migration.historic_data_loader import (
create_or_get_user,
print_results,
log_results,
)
from census_historical_migration.workbooklib.end_to_end_core import run_end_to_end
from django.conf import settings
Expand Down Expand Up @@ -60,7 +60,7 @@ def initiate_migration(self, dbkeys_str, years_str):
result_log[(year, dbkey)] = result
total_count += 1

print_results(result_log, error_count, total_count)
log_results(result_log, error_count, total_count)

def handle(self, *args, **options):
dbkeys_str = options["dbkeys"]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,19 @@
from django.conf import settings

from ..exception_utils import DataMigrationError
from ..sac_general_lib.general_information import (
from .general_information import (
general_information,
)
from ..sac_general_lib.audit_information import (
from .audit_information import (
audit_information,
)
from ..sac_general_lib.auditee_certification import (
from .auditee_certification import (
auditee_certification,
)
from ..sac_general_lib.auditor_certification import (
from .auditor_certification import (
auditor_certification,
)
from ..sac_general_lib.report_id_generator import (
from .report_id_generator import (
xform_dbkey_to_report_id,
)

Expand All @@ -43,10 +43,13 @@ def setup_sac(user, audit_header):
if exists:
exists.delete()

general_info = general_information(audit_header)

sac = SingleAuditChecklist.objects.create(
submitted_by=user,
general_information=general_information(audit_header),
general_information=general_info,
audit_information=audit_information(audit_header),
audit_type=general_info["audit_type"],
)

sac.report_id = generated_report_id
Expand Down
Loading

0 comments on commit 4b663fd

Please sign in to comment.