Skip to content

Commit

Permalink
Initial commit of JSON-LD import backend (etl module) #10798
Browse files Browse the repository at this point in the history
  • Loading branch information
jacobtylerwalls committed Apr 30, 2024
1 parent 5fcd698 commit f10f02d
Show file tree
Hide file tree
Showing 3 changed files with 92 additions and 8 deletions.
23 changes: 16 additions & 7 deletions arches/app/etl_modules/base_import_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from django.utils.decorators import method_decorator
from django.db import connection

from arches.app.etl_modules.decorators import load_data_async
from arches.app.etl_modules.save import save_to_tiles
from arches.app.models.models import Node
from arches.app.models.system_settings import settings
Expand Down Expand Up @@ -189,7 +190,19 @@ def run_load_task(self, userid, files, summary, result, temp_dir, loadid):
result["summary"] = summary
return {"success": result["validation"]["success"], "data": result}

def validate_uploaded_file(self, file, kwarg):
@load_data_async
def run_load_task_async(self, request):
raise NotImplementedError

def prepare_temp_dir(self, request):
self.loadid = request.POST.get("load_id")
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
try:
self.delete_from_default_storage(self.temp_dir)
except FileNotFoundError:
pass

def validate_uploaded_file(self, file):
pass

### Actions ###
Expand All @@ -207,14 +220,10 @@ def validate(self, loadid):
return {"success": success, "data": row}

def read(self, request):
self.loadid = request.POST.get("load_id")
self.prepare_temp_dir(request)
self.cumulative_excel_files_size = 0
content = request.FILES["file"]
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
try:
self.delete_from_default_storage(self.temp_dir)
except (FileNotFoundError):
pass

result = {"summary": {"name": content.name, "size": self.filesize_format(content.size), "files": {}}}
validator = FileValidator()
if len(validator.validate_file_type(content)) > 0:
Expand Down
76 changes: 76 additions & 0 deletions arches/app/etl_modules/jsonld_importer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
import os
import zipfile
from functools import lru_cache
from pathlib import Path

from django.core.files import File
from django.core.files.storage import default_storage
from django.utils.translation import gettext as _

from arches.app.etl_modules.base_import_module import BaseImportModule, FileValidationError
from arches.app.etl_modules.decorators import load_data_async
from arches.app.models.models import GraphModel
from arches.app.utils.file_validator import FileValidator


@lru_cache(maxsize=1)
def graph_id_from_slug(slug):
return GraphModel.objects.get(slug=slug).pk


class JSONLDImporter(BaseImportModule):
def read(self, request):
self.prepare_temp_dir(request)
self.cumulative_json_files_size = 0
content = request.FILES["file"]

result = {"summary": {"name": content.name, "size": self.filesize_format(content.size), "files": {}}}
validator = FileValidator()
if validator.validate_file_type(content):
return {
"status": 400,
"success": False,
"title": _("Invalid Uploaded File"),
"message": _("Upload a valid zip file"),
}

with zipfile.ZipFile(content, "r") as zip_ref:
files = zip_ref.infolist()
for file in files:
if file.filename.split(".")[-1] != "json":
continue
if file.filename.startswith("__MACOSX"):
continue
if file.is_dir():
continue
self.cumulative_json_files_size += file.file_size
result["summary"]["files"][file.filename] = {"size": (self.filesize_format(file.file_size))}
result["summary"]["cumulative_json_files_size"] = self.cumulative_json_files_size
with zip_ref.open(file) as opened_file:
self.validate_uploaded_file(opened_file)
f = File(opened_file)
default_storage.save(os.path.join(self.temp_dir, file.filename), f)

if not result["summary"]["files"]:
title = _("Invalid Uploaded File")
message = _("This file has missing information or invalid formatting. Make sure the file is complete and in the expected format.")
return {"success": False, "data": {"title": title, "message": message}}

return {"success": True, "data": result}

def validate_uploaded_file(self, file):
path = Path(file.name)
try:
graph_id_from_slug(path.parts[1])
except GraphModel.ObjectDoesNotExist:
raise FileValidationError(
code=404,
message=_('The model "{0}" does not exist.').format(path.parts[1])
)

def run_load_task(self, userid, files, summary, result, temp_dir, loadid):
...

@load_data_async
def run_load_task_async(self, request):
...
1 change: 0 additions & 1 deletion arches/app/etl_modules/tile_excel_importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
from arches.app.models.system_settings import settings
from arches.app.utils.betterJSONSerializer import JSONSerializer
from arches.app.etl_modules.base_import_module import BaseImportModule, FileValidationError
from arches.app.etl_modules.base_import_module import BaseImportModule
import arches.app.tasks as tasks
from arches.management.commands.etl_template import create_tile_excel_workbook

Expand Down

0 comments on commit f10f02d

Please sign in to comment.