Skip to content

Commit f10f02d

Browse files
Initial commit of JSON-LD import backend (etl module) archesproject#10798
1 parent 5fcd698 commit f10f02d

File tree

3 files changed

+92
-8
lines changed

3 files changed

+92
-8
lines changed

arches/app/etl_modules/base_import_module.py

+16-7
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from django.utils.decorators import method_decorator
1414
from django.db import connection
1515

16+
from arches.app.etl_modules.decorators import load_data_async
1617
from arches.app.etl_modules.save import save_to_tiles
1718
from arches.app.models.models import Node
1819
from arches.app.models.system_settings import settings
@@ -189,7 +190,19 @@ def run_load_task(self, userid, files, summary, result, temp_dir, loadid):
189190
result["summary"] = summary
190191
return {"success": result["validation"]["success"], "data": result}
191192

192-
def validate_uploaded_file(self, file, kwarg):
193+
@load_data_async
194+
def run_load_task_async(self, request):
195+
raise NotImplementedError
196+
197+
def prepare_temp_dir(self, request):
198+
self.loadid = request.POST.get("load_id")
199+
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
200+
try:
201+
self.delete_from_default_storage(self.temp_dir)
202+
except FileNotFoundError:
203+
pass
204+
205+
def validate_uploaded_file(self, file):
193206
pass
194207

195208
### Actions ###
@@ -207,14 +220,10 @@ def validate(self, loadid):
207220
return {"success": success, "data": row}
208221

209222
def read(self, request):
210-
self.loadid = request.POST.get("load_id")
223+
self.prepare_temp_dir(request)
211224
self.cumulative_excel_files_size = 0
212225
content = request.FILES["file"]
213-
self.temp_dir = os.path.join(settings.UPLOADED_FILES_DIR, "tmp", self.loadid)
214-
try:
215-
self.delete_from_default_storage(self.temp_dir)
216-
except (FileNotFoundError):
217-
pass
226+
218227
result = {"summary": {"name": content.name, "size": self.filesize_format(content.size), "files": {}}}
219228
validator = FileValidator()
220229
if len(validator.validate_file_type(content)) > 0:
+76
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
import os
2+
import zipfile
3+
from functools import lru_cache
4+
from pathlib import Path
5+
6+
from django.core.files import File
7+
from django.core.files.storage import default_storage
8+
from django.utils.translation import gettext as _
9+
10+
from arches.app.etl_modules.base_import_module import BaseImportModule, FileValidationError
11+
from arches.app.etl_modules.decorators import load_data_async
12+
from arches.app.models.models import GraphModel
13+
from arches.app.utils.file_validator import FileValidator
14+
15+
16+
@lru_cache(maxsize=1)
17+
def graph_id_from_slug(slug):
18+
return GraphModel.objects.get(slug=slug).pk
19+
20+
21+
class JSONLDImporter(BaseImportModule):
22+
def read(self, request):
23+
self.prepare_temp_dir(request)
24+
self.cumulative_json_files_size = 0
25+
content = request.FILES["file"]
26+
27+
result = {"summary": {"name": content.name, "size": self.filesize_format(content.size), "files": {}}}
28+
validator = FileValidator()
29+
if validator.validate_file_type(content):
30+
return {
31+
"status": 400,
32+
"success": False,
33+
"title": _("Invalid Uploaded File"),
34+
"message": _("Upload a valid zip file"),
35+
}
36+
37+
with zipfile.ZipFile(content, "r") as zip_ref:
38+
files = zip_ref.infolist()
39+
for file in files:
40+
if file.filename.split(".")[-1] != "json":
41+
continue
42+
if file.filename.startswith("__MACOSX"):
43+
continue
44+
if file.is_dir():
45+
continue
46+
self.cumulative_json_files_size += file.file_size
47+
result["summary"]["files"][file.filename] = {"size": (self.filesize_format(file.file_size))}
48+
result["summary"]["cumulative_json_files_size"] = self.cumulative_json_files_size
49+
with zip_ref.open(file) as opened_file:
50+
self.validate_uploaded_file(opened_file)
51+
f = File(opened_file)
52+
default_storage.save(os.path.join(self.temp_dir, file.filename), f)
53+
54+
if not result["summary"]["files"]:
55+
title = _("Invalid Uploaded File")
56+
message = _("This file has missing information or invalid formatting. Make sure the file is complete and in the expected format.")
57+
return {"success": False, "data": {"title": title, "message": message}}
58+
59+
return {"success": True, "data": result}
60+
61+
def validate_uploaded_file(self, file):
62+
path = Path(file.name)
63+
try:
64+
graph_id_from_slug(path.parts[1])
65+
except GraphModel.ObjectDoesNotExist:
66+
raise FileValidationError(
67+
code=404,
68+
message=_('The model "{0}" does not exist.').format(path.parts[1])
69+
)
70+
71+
def run_load_task(self, userid, files, summary, result, temp_dir, loadid):
72+
...
73+
74+
@load_data_async
75+
def run_load_task_async(self, request):
76+
...

arches/app/etl_modules/tile_excel_importer.py

-1
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
from arches.app.models.system_settings import settings
1717
from arches.app.utils.betterJSONSerializer import JSONSerializer
1818
from arches.app.etl_modules.base_import_module import BaseImportModule, FileValidationError
19-
from arches.app.etl_modules.base_import_module import BaseImportModule
2019
import arches.app.tasks as tasks
2120
from arches.management.commands.etl_template import create_tile_excel_workbook
2221

0 commit comments

Comments
 (0)