Skip to content

Commit b5cd348

Browse files
committed
Implement PRA mapper
1 parent 30d6065 commit b5cd348

2 files changed

Lines changed: 75 additions & 1 deletion

File tree

metadata_mapper/mappers/mapper.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,7 @@ def select_cmis_atom_id(self):
283283
return self
284284

285285
def select_preservica_id(self):
286-
calisphere_id = self.mapped_data.get("preservica_id", {}).get('$')
286+
calisphere_id = self.source_metadata.get("entity_id")
287287
self.legacy_couch_db_id = f"{self.collection_id}--{calisphere_id}"
288288
return self
289289

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
import re
2+
from xml.etree import ElementTree
3+
4+
from ..mapper import Record, Vernacular
5+
6+
7+
class PreservicaRecord(Record):
8+
BASE_URL = "https://oakland.access.preservica.com"
9+
10+
FILE_PREPEND = "sdb:digitalFile%7C"
11+
12+
def UCLDC_map(self):
13+
entity_id = self.source_metadata.get("entity_id")
14+
15+
return {
16+
"calisphere-id": entity_id,
17+
"contributor": self.source_metadata.get("contributor"),
18+
"coverage": self.source_metadata.get("spatial"),
19+
"creator": self.source_metadata.get("creator"),
20+
"date": self.source_metadata.get("date"),
21+
"description": self.source_metadata.get("description"),
22+
"format": self.source_metadata.get("format"),
23+
"identifier": self.source_metadata.get("identifier"),
24+
"isShownAt": (
25+
f"{self.BASE_URL}/file/{self.FILE_PREPEND}{entity_id}/"
26+
),
27+
"isShownBy": (
28+
f"{self.BASE_URL}/download/thumbnail/{self.FILE_PREPEND}{entity_id}"
29+
),
30+
"language": self.source_metadata.get("language"),
31+
"publisher": self.source_metadata.get("publisher"),
32+
"relation": self.source_metadata.get("relation"),
33+
"rights": self.source_metadata.get("rights"),
34+
"source": self.source_metadata.get("source"),
35+
"state_located_in": {"stateLocatedIn": "California"},
36+
"subject": self.source_metadata.get("subject"),
37+
"title": self.source_metadata.get("title"),
38+
"type": self.source_metadata.get("type"),
39+
}
40+
41+
42+
class PreservicaVernacular(Vernacular):
43+
record_cls = PreservicaRecord
44+
45+
# TODO: consider putting this namespace mapping in a place that can be imported
46+
# into both the mapper and fetcher
47+
NAMESPACES: dict = {
48+
"pra": "http://preservica.com/EntityAPI/v6.0",
49+
"xip": "http://preservica.com/XIP/v6.0",
50+
"oai_dc": "http://www.openarchives.org/OAI/2.0/oai_dc/"
51+
}
52+
53+
def parse(self, response_body):
54+
"""
55+
We expect only one record per file for preservica. Minor changes will need to
56+
be made if we begin importing more per page.
57+
"""
58+
et = ElementTree.fromstring(response_body)
59+
container = et.find(".//xip:MetadataContainer", self.NAMESPACES)
60+
61+
dc_record = container.find("xip:Content", self.NAMESPACES).\
62+
find("oai_dc:dc", self.NAMESPACES)
63+
64+
record = {
65+
"entity_id": container.find("xip:Entity", self.NAMESPACES).text,
66+
}
67+
for element in dc_record:
68+
key = re.sub(r"{\S+}", "", element.tag) # Strip the namespace off the tag
69+
value = element.text
70+
if key not in record:
71+
record[key] = []
72+
record[key].append(value)
73+
74+
return self.get_records([record])

0 commit comments

Comments
 (0)