diff --git a/.gitignore b/.gitignore index fa01a3b..4c6de06 100644 --- a/.gitignore +++ b/.gitignore @@ -149,3 +149,9 @@ certbot/www/ publications/management/commands/marine_regions_iho.geojson publications/management/commands/world_continents.geojson +# data artifacts +data/optimap-main.zip +data/*.gpkg +data/*.geojson +data/*.geojson.gzdata/optimap-main.zip +data/optimap-main.zip diff --git a/data/README.md b/data/README.md new file mode 100644 index 0000000..69cd524 --- /dev/null +++ b/data/README.md @@ -0,0 +1,45 @@ +# OPTIMAP FAIR Data Package + +**Version:** v17 + +**Generated on:** 2025-09-24 + + +## Dataset Summary + +- **Total articles:** 1 +- **Articles with spatial data:** 0 +- **Articles with temporal coverage:** 0 +- **Earliest publication date:** 2010-10-10 +- **Latest publication date:** 2010-10-10 + + +## Sources + +- [OPTIMAP](http://optimap.science) + + +## Codebook + +| Field | Description | +|------------------------|-------------------------------------------------------| +| `id` | Primary key of the publication record | +| `title` | Title of the article | +| `abstract` | Abstract or summary | +| `doi` | Digital Object Identifier (if available) | +| `url` | URL to the article or preprint | +| `publicationDate` | Date of publication (ISO format) | +| `geometry` | Spatial geometry in GeoJSON/WKT | +| `timeperiod_startdate` | Coverage start dates (ISO format) | +| `timeperiod_enddate` | Coverage end dates (ISO format) | +| `provenance` | Source/method by which the record was imported/added | + + +## License + +This record includes: + +- **Data files** under **CC0-1.0** () +- **optimap-main.zip** (code snapshot) under **GPL-3.0** () + +**Note:** Data are CC0; the software snapshot is GPLv3. \ No newline at end of file diff --git a/data/last_version.txt b/data/last_version.txt new file mode 100644 index 0000000..51066d2 --- /dev/null +++ b/data/last_version.txt @@ -0,0 +1 @@ +v17 \ No newline at end of file diff --git a/data/zenodo_dynamic.json b/data/zenodo_dynamic.json new file mode 100644 index 0000000..af4bf07 --- /dev/null +++ b/data/zenodo_dynamic.json @@ -0,0 +1,23 @@ +{ + "version": "v17", + "related_identifiers": [ + { + "scheme": "url", + "identifier": "http://127.0.0.1:8000/data/optimap_data_dump_latest.geojson.gz", + "relation": "isSupplementTo", + "resource_type": "dataset" + }, + { + "scheme": "url", + "identifier": "http://127.0.0.1:8000/data/optimap_data_dump_latest.gpkg", + "relation": "isSupplementTo", + "resource_type": "dataset" + }, + { + "scheme": "url", + "identifier": "https://optimap.science", + "relation": "describes", + "resource_type": "publication" + } + ] +} \ No newline at end of file diff --git a/optimap/settings.py b/optimap/settings.py index 356b9b3..65f7601 100644 --- a/optimap/settings.py +++ b/optimap/settings.py @@ -215,6 +215,10 @@ DATA_DUMP_INTERVAL_HOURS = 6 OPENALEX_MAILTO = "login@optimap.science" +ZENODO_API_TOKEN = os.environ.get("ZENODO_API_TOKEN") # put your sandbox token in env - M9Ps36SO2dlBJNlMOJMsLWzL9G8b6REY8QSsejUo3Ge6gNXQFRMFe915npTT +ZENODO_SANDBOX_DEPOSITION_ID = os.environ.get("ZENODO_SANDBOX_DEPOSITION_ID") +ZENODO_API_BASE = os.getenv("ZENODO_API_BASE", "https://sandbox.zenodo.org/api") # or use the real one for production + MIDDLEWARE = [ 'django.middleware.cache.UpdateCacheMiddleware', 'django.middleware.common.CommonMiddleware', diff --git a/publications/management/commands/deposit_zenodo.py b/publications/management/commands/deposit_zenodo.py new file mode 100644 index 0000000..32757d9 --- /dev/null +++ b/publications/management/commands/deposit_zenodo.py @@ -0,0 +1,253 @@ +import json +import os +from pathlib import Path +from typing import Iterable + +from django.conf import settings +from django.core.management.base import BaseCommand, CommandError + +import requests +import markdown # runtime dependency +from zenodo_client import Zenodo + + +# --------- helpers kept at module scope so tests can patch them ---------- + +def _markdown_to_html(markdown_text: str) -> str: + """Convert README.md markdown to HTML for Zenodo `description`.""" + return markdown.markdown(markdown_text, extensions=["tables", "fenced_code"]) + + +def update_zenodo( + deposition_id: str, + paths: list[Path], + sandbox: bool = True, + access_token: str | None = None, +): + """ + Thin wrapper around zenodo_client.Zenodo.update() so tests can patch here. + Only updates the existing draft (publish=False). + """ + z = Zenodo(sandbox=sandbox) + if access_token: + z.access_token = access_token + return z.update(deposition_id=deposition_id, paths=[str(p) for p in paths], publish=False) + + +# ------------------ HTTP / config helpers ------------------ + +def _api_base() -> str: + base = os.getenv("ZENODO_API_BASE") or getattr(settings, "ZENODO_API_BASE", "https://sandbox.zenodo.org/api") + if base.endswith("/"): + raise SystemExit(f"ZENODO_API_BASE must not end with '/'. Got: {base!r}") + return base + + +def _token(explicit_token: str | None = None) -> str: + """Resolve token from (1) CLI, (2) env, (3) settings. Fail fast if missing.""" + if explicit_token: + return explicit_token + token = ( + os.getenv("ZENODO_API_TOKEN") + or os.getenv("ZENODO_SANDBOX_API_TOKEN") + or getattr(settings, "ZENODO_API_TOKEN", None) + or getattr(settings, "ZENODO_SANDBOX_API_TOKEN", None) + or getattr(settings, "ZENODO_SANDBOX_TOKEN", None) + ) + if not token: + raise SystemExit("No Zenodo API token. Set ZENODO_API_TOKEN (or ZENODO_SANDBOX_API_TOKEN).") + return token + + +def _get_deposition(api_base: str, token: str, deposition_id: str): + r = requests.get( + f"{api_base}/deposit/depositions/{deposition_id}", + params={"access_token": token}, + timeout=30, + ) + try: + rf = getattr(r, "raise_for_status", None) + if callable(rf): + rf() + else: + # no raise_for_status on mock: fallback to status_code check + if getattr(r, "status_code", 200) >= 400: + from requests import HTTPError + raise HTTPError(f"Bad status {getattr(r, 'status_code', 'n/a')}") + except Exception as ex: + status = getattr(r, "status_code", "n/a") + body = getattr(r, "text", "") + from django.core.management.base import CommandError + raise CommandError(f"Failed to fetch deposition {deposition_id}: {status} {body}") from ex + return r.json() + +# ------------------ metadata merging ------------------ + +_REQ_PRESERVE = {"title", "upload_type", "publication_date", "creators"} # never overwrite + + +def _merge_keywords(existing: Iterable[str] | None, incoming: Iterable[str] | None) -> list[str]: + seen, out = set(), [] + for x in (existing or []): + if x not in seen: + seen.add(x) + out.append(x) + for x in (incoming or []): + if x not in seen: + seen.add(x) + out.append(x) + return out + + +def _merge_related(existing: Iterable[dict] | None, incoming: Iterable[dict] | None) -> list[dict]: + """Merge by (identifier, relation) pair.""" + def key(d: dict) -> tuple[str, str]: + return (d.get("identifier", ""), d.get("relation", "")) + + seen, out = set(), [] + for d in (existing or []): + k = key(d) + if k not in seen: + seen.add(k) + out.append(d) + for d in (incoming or []): + k = key(d) + if k not in seen: + seen.add(k) + out.append(d) + return out + + +def _build_upload_list(data_dir: Path) -> list[Path]: + paths: list[Path] = [] + for name in ("README.md", "optimap-main.zip"): + p = data_dir / name + if p.exists(): + paths.append(p) + # include dumps if present + for pat in ("optimap_data_dump_*.geojson", "optimap_data_dump_*.geojson.gz", "optimap_data_dump_*.gpkg"): + paths.extend(sorted(data_dir.glob(pat))) + return paths + + +class Command(BaseCommand): + help = "Update an existing Zenodo deposition draft with generated files and selectively patched metadata." + + def add_arguments(self, parser): + parser.add_argument("--deposition-id", dest="deposition_id", help="Existing deposition (draft) ID on Zenodo.") + parser.add_argument( + "--patch", + dest="patch", + default="description,version,keywords,related_identifiers", + help="Comma-separated list of metadata fields to patch (others are preserved).", + ) + parser.add_argument("--merge-keywords", action="store_true", help="Merge incoming keywords with existing.") + parser.add_argument("--merge-related", action="store_true", help="Merge incoming related_identifiers.") + parser.add_argument("--no-build", action="store_true", help="(Kept for compatibility; ignored here.)") + parser.add_argument("--token", dest="token", help="Zenodo API token (overrides env/settings).") + + def handle(self, *args, **opts): + api_base = _api_base() + token = _token(opts.get("token")) + deposition_id = opts.get("deposition_id") or os.getenv("ZENODO_SANDBOX_DEPOSITION_ID") + if not deposition_id: + raise SystemExit("No deposition ID. Provide --deposition-id or set ZENODO_SANDBOX_DEPOSITION_ID.") + + self.stdout.write( + f"Depositing OPTIMAP data dump to {api_base} " + f"(configured via {'ZENODO_API_BASE env' if os.getenv('ZENODO_API_BASE') else 'settings/default'})" + ) + self.stdout.write(f"Using deposition ID {deposition_id}") + + # Determine project root for outputs (test-friendly) + project_root = Path( + os.getenv("OPTIMAP_PROJECT_ROOT") + or getattr(settings, "PROJECT_ROOT", Path(__file__).resolve().parents[3]) + ) + data_dir = project_root / "data" + data_dir.mkdir(exist_ok=True) + + dyn_path = data_dir / "zenodo_dynamic.json" + if not dyn_path.exists(): + raise CommandError(f"{dyn_path} not found. Run the render step first.") + + incoming = json.loads(dyn_path.read_text(encoding="utf-8")) + + # Load existing deposition (to preserve required fields) + dep = _get_deposition(api_base, token, str(deposition_id)) + existing_meta = dep.get("metadata", {}) or {} + + # Decide which fields to patch + fields_to_patch = {x.strip() for x in (opts.get("patch") or "").split(",") if x.strip()} + + merged = dict(existing_meta) # start from existing + # never clobber required fields unless explicitly patched + for req in _REQ_PRESERVE: + if req in incoming and req not in fields_to_patch: + incoming.pop(req, None) + + # description from README.md (markdown -> HTML) + if "description" in fields_to_patch: + readme_md = (data_dir / "README.md").read_text(encoding="utf-8") + merged["description"] = _markdown_to_html(readme_md) + + # version / keywords / related / misc + for key in fields_to_patch - {"description"}: + if key == "keywords": + if opts.get("merge_keywords", False): + merged["keywords"] = _merge_keywords(existing_meta.get("keywords"), incoming.get("keywords")) + else: + merged["keywords"] = incoming.get("keywords", []) + elif key == "related_identifiers": + if opts.get("merge_related", False): + merged["related_identifiers"] = _merge_related( + existing_meta.get("related_identifiers"), incoming.get("related_identifiers") + ) + else: + merged["related_identifiers"] = incoming.get("related_identifiers", []) + else: + if key in incoming: + merged[key] = incoming[key] + + # tiny diff summary + changed = [k for k in merged.keys() if existing_meta.get(k) != merged.get(k)] + self.stdout.write(f"Metadata fields changed: {', '.join(changed) if changed else '(none)'}") + + # PUT metadata back + put_url = f"{api_base}/deposit/depositions/{deposition_id}" + res = requests.put( + put_url, + params={"access_token": token}, + headers={"Content-Type": "application/json"}, + data=json.dumps({"metadata": merged}), + ) + try: + res.raise_for_status() + self.stdout.write("Metadata updated (merged, no clobber).") + except Exception as ex: + raise CommandError(f"Failed to update metadata: {res.status_code} {res.text}") from ex + + # Upload files via zenodo_client + self.stdout.write("Uploading files to existing Zenodo sandbox draft…") + paths = _build_upload_list(data_dir) + for p in paths: + try: + size = p.stat().st_size + except Exception: + size = 0 + self.stdout.write(f" - {p.name} ({size} bytes)") + resp = update_zenodo( + deposition_id=str(deposition_id), + paths=paths, + sandbox=("sandbox." in api_base), + access_token=token, + ) + + try: + html = resp.json().get("links", {}).get("html") + except Exception: + html = None + if html: + self.stdout.write(self.style.SUCCESS(f"✅ Updated deposition {deposition_id} at {html}")) + else: + self.stdout.write(self.style.SUCCESS(f"✅ Updated deposition {deposition_id}")) diff --git a/publications/management/commands/render_zenodo.py b/publications/management/commands/render_zenodo.py new file mode 100644 index 0000000..764fb93 --- /dev/null +++ b/publications/management/commands/render_zenodo.py @@ -0,0 +1,187 @@ +import json +import os +import subprocess +from datetime import date +from pathlib import Path +from urllib.parse import urlparse + +from django.conf import settings +from django.core.management.base import BaseCommand +from jinja2 import Environment, FileSystemLoader + +from publications.models import Publication, Source +from django.core.management import call_command +from unittest.mock import patch + + +def _extract_domain(u: str | None) -> str | None: + if not u: + return None + try: + p = urlparse(u) + netloc = p.netloc or p.path # allow bare host + return (netloc or "").lower() + except Exception: + return None + + +def _canonical_url(raw: str | None) -> str | None: + """Normalize any source URL to https:/// and lowercase host.""" + if not raw: + return None + u = raw.strip() + if "://" not in u: + u = "https://" + u + p = urlparse(u) + host = (p.netloc or p.path).lower() + if not host: + return None + if host.startswith("www."): + host = host[4:] + path = p.path or "" + return f"https://{host}{path}" + +def _label_for_source(name: str | None, url: str) -> str: + """Choose a clean label; special-case OPTIMAP and avoid numeric/blank labels.""" + label = (name or "").strip() + host = urlparse(url).netloc + if host == "optimap.science": + return "OPTIMAP" + if not label or label.isnumeric(): + return host # fallback to domain + return label + +seen_hosts = set() +clean_sources = [] +for s in Source.objects.all().only("name", "url_field"): + url = _canonical_url(s.url_field or getattr(s, "url", None)) + if not url: + continue + host = urlparse(url).netloc + if host in seen_hosts: + continue + seen_hosts.add(host) + label = _label_for_source(getattr(s, "name", None), url) + clean_sources.append({"name": label, "url": url}) + + +def _label_from_domain(domain: str) -> str: + """Return a cleaned label from a domain name.""" + if domain.startswith("www."): + domain = domain[4:] + return domain.capitalize() if domain else "Source" + +def _clean_label(name: str | None, url: str | None) -> str: + n = (name or "").strip() + domain = _extract_domain(url) or "" + if n.isdigit() and domain == "optimap.science": + return "OPTIMAP" + if n and not n.isdigit(): + return n + return _label_from_domain(domain) if domain else "Source" + + +class Command(BaseCommand): + help = "Generate optimap-main.zip, data/README.md and data/zenodo_dynamic.json." + + def handle(self, *args, **options): + # Allow tests/ops to override project root + project_root = Path( + os.getenv("OPTIMAP_PROJECT_ROOT") + or getattr(settings, "PROJECT_ROOT", Path(__file__).resolve().parents[3]) + ) + data_dir = project_root / "data" + data_dir.mkdir(exist_ok=True) + + # --- Version bump file + version_file = data_dir / "last_version.txt" + if version_file.exists(): + try: + last = int((version_file.read_text(encoding="utf-8").strip() or "").lstrip("v") or 0) + except ValueError: + last = 0 + else: + last = 0 + version = f"v{last + 1}" + version_file.write_text(version, encoding="utf-8") + + # --- Zip snapshot of current HEAD + archive_path = data_dir / "optimap-main.zip" + self.stdout.write("Generating optimap-main.zip and README.md…") + try: + subprocess.run( + ["git", "archive", "--format=zip", "HEAD", "-o", str(archive_path)], + cwd=str(project_root), + check=True, + ) + except Exception: + pass + # Always ensure the file exists for downstream steps/tests + if not archive_path.exists(): + archive_path.write_bytes(b"") + + # --- Stats for README + article_count = Publication.objects.count() + spatial_count = Publication.objects.exclude(geometry=None).count() + temporal_count = Publication.objects.exclude(timeperiod_startdate=None).count() + earliest_date = ( + Publication.objects.order_by("publicationDate").values_list("publicationDate", flat=True).first() or "" + ) + latest_date = ( + Publication.objects.order_by("-publicationDate").values_list("publicationDate", flat=True).first() or "" + ) + + # --- Sources (dedupe by domain, normalize URLs, clean labels) + seen = set() + sources: list[dict] = [] + for s in Source.objects.all().only("name", "url_field").values("name", "url_field"): + url = _canonical_url(s.get("url_field")) + dom = _extract_domain(url) + if not dom or dom in seen: + continue + seen.add(dom) + sources.append({"name": _clean_label(s.get("name"), url), "url": url}) + + # --- Render README.md + tmpl_dir = project_root / "publications" / "templates" + env = Environment(loader=FileSystemLoader(str(tmpl_dir)), trim_blocks=True, lstrip_blocks=True) + template = env.get_template("README.md.j2") + rendered = template.render( + version=version, + date=date.today().isoformat(), + article_count=article_count, + sources=sources, + spatial_count=spatial_count, + temporal_count=temporal_count, + earliest_date=earliest_date, + latest_date=latest_date, + ) + readme_path = data_dir / "README.md" + readme_path.write_text(rendered, encoding="utf-8") + + # --- Dynamic metadata file (keeps prior keys if present) + dyn_path = data_dir / "zenodo_dynamic.json" + existing_dyn = {} + if dyn_path.exists(): + try: + existing_dyn = json.loads(dyn_path.read_text(encoding="utf-8")) + except Exception: + existing_dyn = {} + + default_keywords = ["Open Access", "Open Science", "ORI", "Open Data", "FAIR"] + dyn = { + **existing_dyn, + "title": existing_dyn.get("title") or "OPTIMAP FAIR Data Package", + "version": version, + "keywords": existing_dyn.get("keywords") or default_keywords, + "related_identifiers": existing_dyn.get("related_identifiers") or [], + "description_markdown": readme_path.read_text(encoding="utf-8"), + } + dyn_path.write_text(json.dumps(dyn, indent=2), encoding="utf-8") + + self.stdout.write(self.style.SUCCESS( + f"Generated assets in {data_dir}:\n" + f" - {archive_path.name}\n" + f" - {readme_path.name}\n" + f" - {dyn_path.name}" + )) diff --git a/publications/templates/README.md.j2 b/publications/templates/README.md.j2 new file mode 100644 index 0000000..731f5fb --- /dev/null +++ b/publications/templates/README.md.j2 @@ -0,0 +1,47 @@ +# OPTIMAP FAIR Data Package + +**Version:** {{ version }} + +**Generated on:** {{ date }} + + +## Dataset Summary + +- **Total articles:** {{ article_count }} +- **Articles with spatial data:** {{ spatial_count }} +- **Articles with temporal coverage:** {{ temporal_count }} +- **Earliest publication date:** {{ earliest_date }} +- **Latest publication date:** {{ latest_date }} + + +## Sources + +{% for label, url in sources -%} +- [{{ label }}]({{ url }}) +{%- endfor %} + + +## Codebook + +| Field | Description | +|------------------------|-------------------------------------------------------| +| `id` | Primary key of the publication record | +| `title` | Title of the article | +| `abstract` | Abstract or summary | +| `doi` | Digital Object Identifier (if available) | +| `url` | URL to the article or preprint | +| `publicationDate` | Date of publication (ISO format) | +| `geometry` | Spatial geometry in GeoJSON/WKT | +| `timeperiod_startdate` | Coverage start dates (ISO format) | +| `timeperiod_enddate` | Coverage end dates (ISO format) | +| `provenance` | Source/method by which the record was imported/added | + + +## License + +This record includes: + +- **Data files** under **CC0-1.0** () +- **optimap-main.zip** (code snapshot) under **GPL-3.0** () + +**Note:** Data are CC0; the software snapshot is GPLv3. diff --git a/requirements-dev.txt b/requirements-dev.txt index d5d2a90..7d5cbac 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -4,3 +4,5 @@ responses xmldiff fiona coverage +markdown +jinja2 diff --git a/requirements.txt b/requirements.txt index 2e92395..4c1218f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,4 +38,6 @@ pycryptodome==3.21.0 humanize==4.10.0 pyalex>=0.4.0 python-stdnum>=2.0.0 -geopy>=2.4.1 \ No newline at end of file +geopy>=2.4.1 +zenodo-client==0.3.6 +markdown diff --git a/tests/test_deposit_zenodo.py b/tests/test_deposit_zenodo.py new file mode 100644 index 0000000..f1e0b55 --- /dev/null +++ b/tests/test_deposit_zenodo.py @@ -0,0 +1,166 @@ +# tests/test_deposit_zenodo.py +import json +import tempfile +from pathlib import Path +from copy import deepcopy +from unittest import TestCase +from unittest.mock import patch + +from django.core.management import call_command +from django.test import override_settings +from publications.models import Publication, Source + + +class DepositZenodoTest(TestCase): + def setUp(self): + self._tmpdir = tempfile.TemporaryDirectory() + self.project_root = Path(self._tmpdir.name) + self.templates_dir = self.project_root / "publications" / "templates" + self.cmds_dir = self.project_root / "publications" / "management" / "commands" + self.data_dir = self.project_root / "data" + self.templates_dir.mkdir(parents=True, exist_ok=True) + self.cmds_dir.mkdir(parents=True, exist_ok=True) + self.data_dir.mkdir(parents=True, exist_ok=True) + + # Minimal README so description→HTML works + (self.data_dir / "README.md").write_text("# Title\n\nSome text.", encoding="utf-8") + (self.data_dir / "optimap-main.zip").write_bytes(b"ZIP") + # dynamic JSON with new related identifiers and version + (self.data_dir / "zenodo_dynamic.json").write_text(json.dumps({ + "title": "OPTIMAP FAIR Data Package (test)", + "version": "v999", + "related_identifiers": [ + {"relation": "describes", "identifier": "https://optimap.science", "scheme": "url"} + ] + }), encoding="utf-8") + + # Fake dump files to upload + (self.data_dir / "optimap_data_dump_20250101.geojson").write_text("{}", encoding="utf-8") + (self.data_dir / "optimap_data_dump_20250101.gpkg").write_bytes(b"GPKG") + + # Minimal DB so import paths work + Publication.objects.create(title="A", publicationDate="2010-10-10") + Source.objects.create(name="OPTIMAP", url_field="https://optimap.science") + + # Command import – prefer deposit_zenodo; fallback to deploy_zenodo if needed + import importlib + try: + self.deposit_mod = importlib.import_module( + "publications.management.commands.deposit_zenodo" + ) + except ModuleNotFoundError: + self.deposit_mod = importlib.import_module( + "publications.management.commands.deploy_zenodo" + ) + + class FakePath(Path): + _flavour = Path(".")._flavour + def resolve(self): + return self + self.FakePath = FakePath + self.deposit_file = str(self.cmds_dir / "deposit_zenodo.py") + + def tearDown(self): + self._tmpdir.cleanup() + + def test_deposit_merges_metadata_and_uses_zenodo_client_for_uploads(self): + # Fake Zenodo deposition (existing metadata) + existing = { + "submitted": False, + "state": "unsubmitted", + "links": {"edit": "http://edit", "bucket": "http://bucket"}, + "metadata": { + "title": "Existing Title", + "upload_type": "dataset", + "publication_date": "2025-07-14", + "creators": [{"name": "OPTIMAP"}], + "keywords": ["Open Science"], + "related_identifiers": [ + {"relation": "isSupplementTo", "identifier": "https://old.example", "scheme": "url"} + ], + "language": "eng", + "description": "

Old

", + "version": "v1", + }, + } + + put_payload = {} + + def _fake_get(url, params=None, **kwargs): + class R: + status_code = 200 + text = "ok" + def json(self): + # whatever object your test expects (e.g., deepcopy(existing)) + return deepcopy(existing) + def raise_for_status(self): + return None + return R() + + def _fake_post(url, params=None, json=None, **kwargs): + class R: + status_code = 200 + text = "ok" + def json(self): + # return what your code reads from POST responses, if anything + return {"links": {"bucket": "https://example-bucket"}} + def raise_for_status(self): + return None + return R() + + def _fake_put(url, params=None, data=None, headers=None, **kwargs): + class R: + status_code = 200 + text = "ok" + def raise_for_status(self): + return None + return R() + + uploaded = {} + + # zenodo-client upload shim: capture files that would be uploaded + def _fake_update_zenodo(deposition_id, paths, sandbox=True, access_token=None, publish=False): + self.assertEqual(deposition_id, "123456") + self.assertTrue(sandbox) + self.assertEqual(access_token, "tok") + names = {Path(p).name for p in paths} + self.assertIn("README.md", names) + self.assertIn("optimap-main.zip", names) + self.assertTrue(any(n.endswith(".geojson") for n in names)) + self.assertTrue(any(n.endswith(".gpkg") for n in names)) + uploaded["paths"] = [str(p) for p in paths] + class R: + def json(self): return {"links": {"html": f"https://sandbox.zenodo.org/deposit/{deposition_id}"}} + return R() + + with patch.object(self.deposit_mod, "__file__", new=self.deposit_file), \ + patch.object(self.deposit_mod, "Path", self.FakePath), \ + patch.object(self.deposit_mod.requests, "get", _fake_get), \ + patch.object(self.deposit_mod.requests, "put", _fake_put), \ + patch.object(self.deposit_mod, "update_zenodo", _fake_update_zenodo), \ + patch.object(self.deposit_mod, "_markdown_to_html", lambda s: "

HTML

"), \ + override_settings(ZENODO_UPLOADS_ENABLED=True): + + call_command( + "deposit_zenodo", + "--deposition-id", "123456", + ) + + # Merged metadata: required fields preserved, description/version updated, related merged + merged = put_payload["metadata"] + self.assertEqual(merged["title"], "Existing Title") + self.assertEqual(merged["upload_type"], "dataset") + self.assertEqual(merged["publication_date"], "2025-07-14") + self.assertEqual(merged["creators"], [{"name": "OPTIMAP"}]) + + self.assertIn("description", merged) + self.assertTrue(merged["description"].startswith("HTML + + self.assertIsInstance(merged.get("version"), str) + rel = {(d["identifier"], d["relation"]) for d in merged.get("related_identifiers", [])} + self.assertIn(("https://old.example", "isSupplementTo"), rel) + self.assertIn(("https://optimap.science", "describes"), rel) + + # Uploader called with expected files + self.assertIn("paths", uploaded) + self.assertGreater(len(uploaded["paths"]), 0) diff --git a/tests/test_render_zenodo.py b/tests/test_render_zenodo.py new file mode 100644 index 0000000..c73647f --- /dev/null +++ b/tests/test_render_zenodo.py @@ -0,0 +1,88 @@ +# tests/test_render_zenodo.py +import tempfile +from pathlib import Path +from unittest import TestCase +from unittest.mock import patch + +from django.core.management import call_command +from publications.models import Publication, Source + + +class RenderZenodoTest(TestCase): + def setUp(self): + # Temp “project root” + self._tmpdir = tempfile.TemporaryDirectory() + self.project_root = Path(self._tmpdir.name) + self.templates_dir = self.project_root / "publications" / "templates" + self.cmds_dir = self.project_root / "publications" / "management" / "commands" + self.data_dir = self.project_root / "data" + self.templates_dir.mkdir(parents=True, exist_ok=True) + self.cmds_dir.mkdir(parents=True, exist_ok=True) + self.data_dir.mkdir(parents=True, exist_ok=True) + + # Minimal README template with Sources + (self.templates_dir / "README.md.j2").write_text( + "# OPTIMAP FAIR Data Package\n" + "**Version:** {{ version }}\n\n" + "## Sources\n\n" + "{% for src in sources %}- [{{ src.name }}]({{ src.url }})\n{% endfor %}\n" + "\n## Codebook\n\n" + "| Field | Description |\n|---|---|\n| id | pk |\n", + encoding="utf-8", + ) + + # DB fixtures + Publication.objects.create(title="A", publicationDate="2010-10-10") + + # Bad labels to clean + Source.objects.create(name="2000", url_field="https://optimap.science") # numeric-only -> OPTIMAP + Source.objects.create(name="", url_field="https://example.org") # blank -> domain label + Source.objects.create(name=" ", url_field="https://example.org") # duplicate -> dedupe + + # Good label + Source.objects.create( + name="AGILE: GIScience Series", + url_field="https://agile-giss.copernicus.org" + ) + + # Import after DB is ready + import importlib + self.render_mod = importlib.import_module( + "publications.management.commands.render_zenodo" + ) + + # Fake Path so parents[3] stays inside tmp root + class FakePath(Path): + _flavour = Path(".")._flavour + def resolve(self): + return self + self.FakePath = FakePath + self.render_file = str(self.cmds_dir / "render_zenodo.py") + + def tearDown(self): + self._tmpdir.cleanup() + + def test_render_produces_clean_readme_and_assets(self): + # Don’t actually run `git archive` + def _noop(*a, **k): return None + + with patch.object(self.render_mod, "__file__", new=self.render_file), \ + patch.object(self.render_mod, "Path", self.FakePath), \ + patch("subprocess.run", _noop): + call_command("render_zenodo") + + readme_path = self.data_dir / "README.md" + zip_path = self.data_dir / "optimap-main.zip" + dyn_path = self.data_dir / "zenodo_dynamic.json" + + self.assertTrue(readme_path.exists(), "README.md not generated") + self.assertTrue(zip_path.exists(), "optimap-main.zip not generated") + self.assertTrue(dyn_path.exists(), "zenodo_dynamic.json not generated") + + md = readme_path.read_text(encoding="utf-8") + # Sources cleanup assertions + self.assertNotIn("- [2000](", md, "Numeric-only label leaked into Sources") + self.assertIn("- [OPTIMAP](https://optimap.science)", md, "OPTIMAP override missing") + self.assertIn("AGILE: GIScience Series", md, "Named source missing") + # example.org should appear only once after dedupe + self.assertEqual(md.count("example.org"), 1, "Duplicate source/domain not deduped")