|
1 | 1 | import json
|
| 2 | +import logging |
| 3 | +import tempfile |
2 | 4 |
|
3 | 5 | from pystac_monty.sources.pdc import PDCDataSource, PDCTransformer
|
4 | 6 |
|
5 |
| -from apps.etl.models import ExtractionData |
| 7 | +from apps.etl.models import ExtractionData, Transform |
6 | 8 | from main.celery import app
|
7 | 9 |
|
8 | 10 | from .handler import BaseTransformerHandler
|
9 | 11 |
|
| 12 | +logger = logging.getLogger(__name__) |
| 13 | + |
10 | 14 |
|
11 | 15 | class PDCTransformHandler(BaseTransformerHandler):
|
12 | 16 | transformer = PDCTransformer
|
13 | 17 | transformer_schema = PDCDataSource
|
14 | 18 |
|
15 | 19 | @classmethod
|
16 |
| - def get_schema_data(cls, extraction_obj: ExtractionData): |
| 20 | + def get_schema_data(cls, extraction_obj: ExtractionData, geo_json_obj: ExtractionData): |
17 | 21 | source_url = extraction_obj.url
|
| 22 | + |
| 23 | + with extraction_obj.parent.resp_data.open("rb") as f: |
| 24 | + file_content = f.read() |
| 25 | + tmp_hazard_file = tempfile.NamedTemporaryFile(suffix=".json", delete=False) |
| 26 | + tmp_hazard_file.write(file_content) |
| 27 | + |
| 28 | + with extraction_obj.resp_data.open("rb") as f: |
| 29 | + file_content = f.read() |
| 30 | + tmp_exposure_detail_file = tempfile.NamedTemporaryFile(suffix=".json", delete=False) |
| 31 | + tmp_exposure_detail_file.write(file_content) |
| 32 | + |
| 33 | + with geo_json_obj.resp_data.open("rb") as f: |
| 34 | + file_content = f.read() |
| 35 | + tmp_geojson_file = tempfile.NamedTemporaryFile(suffix=".json", delete=False) |
| 36 | + tmp_geojson_file.write(file_content) |
| 37 | + |
18 | 38 | data = {
|
19 |
| - "hazards_file_path": extraction_obj.parent.resp_data.path, |
| 39 | + "hazards_file_path": tmp_hazard_file.name, |
20 | 40 | "exposure_timestamp": extraction_obj.metadata["exposure_id"],
|
21 | 41 | "uuid": extraction_obj.metadata["uuid"],
|
22 |
| - "exposure_detail_file_path": extraction_obj.resp_data.path, |
23 |
| - "geojson_file_path": extraction_obj.parent.metadata["geojson_file_path"] or None, |
| 42 | + "exposure_detail_file_path": tmp_exposure_detail_file.name, |
| 43 | + "geojson_file_path": tmp_geojson_file.name, |
24 | 44 | }
|
| 45 | + |
25 | 46 | return cls.transformer_schema(source_url=source_url, data=json.dumps(data))
|
26 | 47 |
|
| 48 | + @classmethod |
| 49 | + def handle_transformation(cls, extraction_id, geo_json_id): |
| 50 | + logger.info("Transformation started") |
| 51 | + extraction_obj = ExtractionData.objects.filter(id=extraction_id).first() |
| 52 | + geo_json_obj = ExtractionData.objects.filter(id=geo_json_id).first() |
| 53 | + if not extraction_obj.resp_data: |
| 54 | + logger.info("Transformation ended due to no data") |
| 55 | + return |
| 56 | + |
| 57 | + transform_obj = Transform.objects.create( |
| 58 | + extraction=extraction_obj, |
| 59 | + status=Transform.Status.PENDING, |
| 60 | + ) |
| 61 | + |
| 62 | + try: |
| 63 | + schema = cls.get_schema_data(extraction_obj, geo_json_obj) |
| 64 | + transformer = cls.transformer(schema) |
| 65 | + transformed_items = transformer.make_items() |
| 66 | + |
| 67 | + transform_obj.status = Transform.Status.SUCCESS |
| 68 | + transform_obj.save(update_fields=["status"]) |
| 69 | + |
| 70 | + cls.load_stac_item_to_queue(transformed_items, transform_obj.id) |
| 71 | + |
| 72 | + logger.info("Transformation ended") |
| 73 | + |
| 74 | + except Exception as e: |
| 75 | + logger.error("Transformation failed", exc_info=True, extra={"extraction_id": extraction_obj.id}) |
| 76 | + transform_obj.status = Transform.Status.FAILED |
| 77 | + transform_obj.save(update_fields=["status"]) |
| 78 | + # FIXME: Check if this creates duplicate entry in Sentry. if yes, remove this. |
| 79 | + raise e |
| 80 | + |
27 | 81 | @staticmethod
|
28 | 82 | @app.task
|
29 |
| - def task(extraction_id): |
30 |
| - return PDCTransformHandler().handle_transformation(extraction_id) |
| 83 | + def task(extraction_id, geo_json_id): |
| 84 | + return PDCTransformHandler().handle_transformation(extraction_id, geo_json_id) |
0 commit comments