|
| 1 | +import logging |
| 2 | +from typing import Any, Callable |
| 3 | + |
| 4 | +import requests |
| 5 | + |
| 6 | +from apps.etl.extraction.sources.base.handler import BaseExtraction |
| 7 | +from apps.etl.extraction.sources.base.utils import manage_duplicate_file_content |
| 8 | +from apps.etl.models import ExtractionData |
| 9 | +from main.celery import app |
| 10 | + |
| 11 | +logger = logging.getLogger(__name__) |
| 12 | + |
| 13 | + |
| 14 | +class IBTrACSExtraction(BaseExtraction): |
| 15 | + """ |
| 16 | + Handles data extraction of IBTrACS |
| 17 | + """ |
| 18 | + |
| 19 | + @classmethod |
| 20 | + def store_extraction_data( # type: ignore[reportIncompatibleMethodOverride] |
| 21 | + cls, |
| 22 | + validate_source_func: Callable[[Any], None] | None, |
| 23 | + source: int, |
| 24 | + response: requests.Response, |
| 25 | + instance_id: int | None = None, |
| 26 | + ): |
| 27 | + """ |
| 28 | + Save extracted data into database. Checks for duplicate content using hashing. |
| 29 | + """ |
| 30 | + file_name = f"{source}.zip" |
| 31 | + resp_data = response |
| 32 | + |
| 33 | + # save the additional response data after the data is fetched from api. |
| 34 | + extraction_instance = ExtractionData.objects.get(id=instance_id) |
| 35 | + extraction_instance.resp_data_type = "application/csv" |
| 36 | + extraction_instance.save(update_fields=["resp_data_type"]) |
| 37 | + |
| 38 | + # Validate the non empty response data. |
| 39 | + if resp_data: |
| 40 | + # manage duplicate file content. |
| 41 | + manage_duplicate_file_content( |
| 42 | + source=extraction_instance.source, |
| 43 | + hash_content=None, |
| 44 | + instance=extraction_instance, |
| 45 | + response_data=resp_data.content, |
| 46 | + file_name=file_name, |
| 47 | + ) |
| 48 | + return resp_data.content |
| 49 | + |
| 50 | + @classmethod |
| 51 | + def handle_extraction(cls, url: str, params: dict | None, source: int): # type: ignore[reportIncompatibleMethodOverride] |
| 52 | + """ |
| 53 | + Process data extraction |
| 54 | + Returns: |
| 55 | + csv file |
| 56 | + """ |
| 57 | + logger.info("Starting data extraction") |
| 58 | + instance = cls._create_extraction_instance(url=url, source=source) |
| 59 | + try: |
| 60 | + cls._update_instance_status(instance, ExtractionData.Status.IN_PROGRESS) |
| 61 | + response = requests.get(url=url, params=params) |
| 62 | + response.raise_for_status() |
| 63 | + instance.resp_code = response.status_code |
| 64 | + instance.save(update_fields=["resp_code"]) |
| 65 | + |
| 66 | + if response.status_code == 200: |
| 67 | + response_data = cls.store_extraction_data( |
| 68 | + instance_id=instance.id, |
| 69 | + source=ExtractionData.Source.IBTRACS, |
| 70 | + response=response, |
| 71 | + validate_source_func=None, |
| 72 | + ) |
| 73 | + if response_data: |
| 74 | + cls._update_instance_status(instance, ExtractionData.Status.SUCCESS) |
| 75 | + logger.info("Data extracted successfully") |
| 76 | + else: |
| 77 | + cls._update_instance_status( |
| 78 | + instance, |
| 79 | + ExtractionData.Status.SUCCESS, |
| 80 | + ExtractionData.ValidationStatus.NO_DATA, |
| 81 | + update_validation=True, |
| 82 | + ) |
| 83 | + logger.warning("NO hazard data found in response") |
| 84 | + # FIXME: Handle else case |
| 85 | + return instance.id |
| 86 | + |
| 87 | + except requests.exceptions.RequestException: |
| 88 | + cls._update_instance_status(instance, ExtractionData.Status.FAILED) |
| 89 | + logger.error( |
| 90 | + "extraction failed", |
| 91 | + exc_info=True, |
| 92 | + extra={ |
| 93 | + "source": instance.source, |
| 94 | + }, |
| 95 | + ) |
| 96 | + raise |
| 97 | + |
| 98 | + @staticmethod |
| 99 | + @app.task |
| 100 | + def task(url: str): # type: ignore[reportIncompatibleMethodOverride] |
| 101 | + return IBTrACSExtraction().handle_extraction(url=url, params=None, source=ExtractionData.Source.IBTRACS) |
0 commit comments