1
- import json
2
1
import logging
3
- import uuid
4
- from datetime import datetime
2
+ import typing
5
3
6
4
import requests
7
- from celery import shared_task
8
5
from django .conf import settings
9
- from django .core .files .base import ContentFile
10
6
11
- from apps .etl .models import ExtractionData , HazardType
7
+ from apps .etl .extraction .sources .base .handler import BaseExtraction
8
+ from apps .etl .models import ExtractionData
9
+ from main .celery import app
12
10
13
11
logger = logging .getLogger (__name__ )
14
12
13
+ EMDATQueryVars = typing .TypedDict (
14
+ "EMDATQueryVars" ,
15
+ {
16
+ "limit" : int | None ,
17
+ "from" : int | None ,
18
+ "to" : int | None ,
19
+ "include_hist" : bool | None ,
20
+ },
21
+ )
15
22
16
- @shared_task
17
- def extract_emdat_latest_data ():
18
- to_year = datetime .now ().year
19
- from_year = int (settings .EMDAT_START_YEAR )
20
- # ref: https://files.emdat.be/docs/emdat_api_cookbook.pdfhttps://files.emdat.be/docs/emdat_api_cookbook.pdf
21
- variables = {"limit" : - 1 , "from" : from_year , "to" : to_year }
22
- return import_hazard_data (variables )
23
23
24
-
25
- @shared_task
26
- def extract_emdat_historical_data ():
27
- variables = {"limit" : - 1 , "include_hist" : True }
28
- return import_hazard_data (variables )
29
-
30
-
31
- @shared_task
32
- def import_hazard_data (variables , ** kwargs ):
24
+ class EMDATExtraction (BaseExtraction ):
33
25
"""
34
- Import hazard data from glide api
26
+ Handles data extraction from the EMDAT API.
35
27
"""
36
- logger .info ("Importing EMDAT data" )
37
- query = """
38
- query monty ($limit: Int, $offset: Int, $include_hist: Boolean, $from: Int, $to: Int) {
39
- api_version
40
- public_emdat(
41
- cursor: {
42
- offset: $offset,
43
- limit: $limit
44
- }
45
- filters: {
46
- include_hist: $include_hist
47
- from: $from
48
- to: $to
49
- }
50
- ) {
51
- total_available
52
- info {
53
- timestamp
54
- filters
55
- cursor
56
- version
57
- }
58
- data {
59
- disno
60
- classif_key
61
- group
62
- subgroup
63
- type
64
- subtype
65
- external_ids
66
- name
67
- iso
68
- country
69
- subregion
70
- region
71
- location
72
- origin
73
- associated_types
74
- ofda_response
75
- appeal
76
- declaration
77
- aid_contribution
78
- magnitude
79
- magnitude_scale
80
- latitude
81
- longitude
82
- river_basin
83
- start_year
84
- start_month
85
- start_day
86
- end_year
87
- end_month
88
- end_day
89
- total_deaths
90
- no_injured
91
- no_affected
92
- no_homeless
93
- total_affected
94
- reconstr_dam
95
- reconstr_dam_adj
96
- insur_dam
97
- insur_dam_adj
98
- total_dam
99
- total_dam_adj
100
- cpi
101
- admin_units
102
- entry_date
103
- last_update
104
- }
105
- }
106
- }
107
- """
108
-
109
- EMDAT_URL = f"{ settings .EMDAT_URL } "
110
- HEADERS = {"Authorization" : settings .EMDAT_AUTHORIZATION_KEY }
111
-
112
- # Create new extraction object for each extraction
113
- emdat_instance = ExtractionData .objects .create (
114
- source = ExtractionData .Source .EMDAT ,
115
- status = ExtractionData .Status .PENDING ,
116
- source_validation_status = ExtractionData .ValidationStatus .NO_VALIDATION ,
117
- hazard_type = HazardType .OTHER ,
118
- attempt_no = 0 ,
119
- trace_id = str (uuid .uuid4 ()),
120
- resp_code = 0 ,
121
- )
122
-
123
- try :
124
- # Set extraction status to progress
125
- emdat_instance .status = ExtractionData .Status .IN_PROGRESS
126
- emdat_instance .save (update_fields = ["status" ])
127
-
128
- paylod = {"query" : query , "variables" : variables }
129
- response = requests .post (EMDAT_URL , json = paylod , headers = HEADERS )
130
- response .raise_for_status ()
131
28
132
- # Save the extraction data
133
- if response and response .status_code == 200 :
134
- file_name = "emdat_disaster_data.json"
135
- emdat_instance .resp_data .save (file_name , ContentFile (response .content ))
136
-
137
- # Set extraction status to success
138
- emdat_instance .status = ExtractionData .Status .SUCCESS
139
- response_content_json = json .loads (response .content )
140
-
141
- # if data is empty set validation status to No Data
142
- if not response_content_json ["data" ]["public_emdat" ]:
143
- emdat_instance .source_validation_status = ExtractionData .ValidationStatus .NO_DATA
144
-
145
- emdat_instance .save (update_fields = ["status" , "source_validation_status" ])
146
-
147
- logger .info ("EMDAT data imported sucessfully" )
148
- return emdat_instance .id
149
-
150
- except requests .exceptions .RequestException :
151
- # Set extraction status to Fail
152
- emdat_instance .status = ExtractionData .Status .FAILED
153
- emdat_instance .save (update_fields = ["status" ])
154
- logger .error ("Extraction failed" , exc_info = True , extra = {"source" : ExtractionData .Source .EMDAT })
155
- # FIXME: Check if this creates duplicate entry in Sentry. if yes, remove this.
156
- raise
29
+ # FIXME: We need to handle GraphQL request in BaseExtraction
30
+ @classmethod
31
+ def handle_extraction (cls , query : str , variables : EMDATQueryVars , source : int ) -> int : # type: ignore[reportIncompatibleMethodOverride]
32
+ """
33
+ Process data extraction.
34
+ Returns:
35
+ int: ID of the extraction instance
36
+ """
37
+ logger .info ("Starting data extraction" )
38
+
39
+ url = f"{ settings .EMDAT_URL } "
40
+ headers = {"Authorization" : settings .EMDAT_AUTHORIZATION_KEY }
41
+
42
+ instance = cls ._create_extraction_instance (url = url , source = source )
43
+
44
+ try :
45
+ cls ._update_instance_status (instance , ExtractionData .Status .IN_PROGRESS )
46
+
47
+ paylod = {"query" : query , "variables" : variables }
48
+ response = requests .post (url , json = paylod , headers = headers )
49
+ response .raise_for_status ()
50
+ response_data = cls ._save_response_data (instance , response )
51
+
52
+ if not response_data or not response_data ["data" ]["public_emdat" ]:
53
+ cls ._update_instance_status (
54
+ instance ,
55
+ ExtractionData .Status .SUCCESS ,
56
+ ExtractionData .ValidationStatus .NO_DATA ,
57
+ update_validation = True ,
58
+ )
59
+ logger .warning ("No hazard data found in response" )
60
+ else :
61
+ cls ._update_instance_status (instance , ExtractionData .Status .SUCCESS )
62
+
63
+ return instance .id
64
+
65
+ except requests .exceptions .RequestException :
66
+ cls ._update_instance_status (instance , ExtractionData .Status .FAILED )
67
+ logger .error (
68
+ "extraction failed" ,
69
+ exc_info = True ,
70
+ extra = {
71
+ "source" : instance .source ,
72
+ },
73
+ )
74
+ raise
75
+
76
+ @staticmethod
77
+ @app .task
78
+ def task (query : str , variables : EMDATQueryVars ): # type: ignore[reportIncompatibleMethodOverride]
79
+ return EMDATExtraction ().handle_extraction (query , variables , ExtractionData .Source .EMDAT )
0 commit comments