|
| 1 | +import re |
| 2 | +from xml.etree import ElementTree |
| 3 | + |
| 4 | +from ..mapper import Record, Vernacular |
| 5 | + |
| 6 | + |
| 7 | +class PreservicaRecord(Record): |
| 8 | + BASE_URL = "https://oakland.access.preservica.com" |
| 9 | + |
| 10 | + FILE_PREPEND = "sdb:digitalFile%7C" |
| 11 | + |
| 12 | + def UCLDC_map(self): |
| 13 | + entity_id = self.source_metadata.get("entity_id") |
| 14 | + |
| 15 | + return { |
| 16 | + "calisphere-id": entity_id, |
| 17 | + "contributor": self.source_metadata.get("contributor"), |
| 18 | + "coverage": self.source_metadata.get("spatial"), |
| 19 | + "creator": self.source_metadata.get("creator"), |
| 20 | + "date": self.source_metadata.get("date"), |
| 21 | + "description": self.source_metadata.get("description"), |
| 22 | + "format": self.source_metadata.get("format"), |
| 23 | + "identifier": self.source_metadata.get("identifier"), |
| 24 | + "isShownAt": ( |
| 25 | + f"{self.BASE_URL}/file/{self.FILE_PREPEND}{entity_id}/" |
| 26 | + ), |
| 27 | + "isShownBy": ( |
| 28 | + f"{self.BASE_URL}/download/thumbnail/{self.FILE_PREPEND}{entity_id}" |
| 29 | + ), |
| 30 | + "language": self.source_metadata.get("language"), |
| 31 | + "publisher": self.source_metadata.get("publisher"), |
| 32 | + "relation": self.source_metadata.get("relation"), |
| 33 | + "rights": self.source_metadata.get("rights"), |
| 34 | + "source": self.source_metadata.get("source"), |
| 35 | + "state_located_in": {"stateLocatedIn": "California"}, |
| 36 | + "subject": self.source_metadata.get("subject"), |
| 37 | + "title": self.source_metadata.get("title"), |
| 38 | + "type": self.source_metadata.get("type"), |
| 39 | + } |
| 40 | + |
| 41 | + |
| 42 | +class PreservicaVernacular(Vernacular): |
| 43 | + record_cls = PreservicaRecord |
| 44 | + |
| 45 | + # TODO: consider putting this namespace mapping in a place that can be imported |
| 46 | + # into both the mapper and fetcher |
| 47 | + NAMESPACES: dict = { |
| 48 | + "pra": "http://preservica.com/EntityAPI/v6.0", |
| 49 | + "xip": "http://preservica.com/XIP/v6.0", |
| 50 | + "oai_dc": "http://www.openarchives.org/OAI/2.0/oai_dc/" |
| 51 | + } |
| 52 | + |
| 53 | + def parse(self, response_body): |
| 54 | + """ |
| 55 | + We expect only one record per file for preservica. Minor changes will need to |
| 56 | + be made if we begin importing more per page. |
| 57 | + """ |
| 58 | + et = ElementTree.fromstring(response_body) |
| 59 | + container = et.find(".//xip:MetadataContainer", self.NAMESPACES) |
| 60 | + |
| 61 | + dc_record = container.find("xip:Content", self.NAMESPACES).\ |
| 62 | + find("oai_dc:dc", self.NAMESPACES) |
| 63 | + |
| 64 | + record = { |
| 65 | + "entity_id": container.find("xip:Entity", self.NAMESPACES).text, |
| 66 | + } |
| 67 | + for element in dc_record: |
| 68 | + key = re.sub(r"{\S+}", "", element.tag) # Strip the namespace off the tag |
| 69 | + value = element.text |
| 70 | + if key not in record: |
| 71 | + record[key] = [] |
| 72 | + record[key].append(value) |
| 73 | + |
| 74 | + return self.get_records([record]) |
0 commit comments