42
42
CHUNK_SIZE = 16 * 1024 # 16kb
43
43
DOWNLOAD_TIMEOUT = 30
44
44
45
+ MAX_RETRIES = 1
45
46
RETRYABLE_ERRORS = (
46
47
errors .DeadlockDetected ,
47
48
errors .LockNotAvailable ,
@@ -86,26 +87,50 @@ def xloader_data_into_datastore(input):
86
87
87
88
job_id = get_current_job ().id
88
89
errored = False
90
+
91
+ # Set-up logging to the db
92
+ handler = StoringHandler (job_id , input )
93
+ level = logging .DEBUG
94
+ handler .setLevel (level )
95
+ logger = logging .getLogger (job_id )
96
+ handler .setFormatter (logging .Formatter ('%(message)s' ))
97
+ logger .addHandler (handler )
98
+ # also show logs on stderr
99
+ logger .addHandler (logging .StreamHandler ())
100
+ logger .setLevel (logging .DEBUG )
101
+
102
+ db .init (config )
89
103
try :
90
- xloader_data_into_datastore_ (input , job_dict )
104
+ # Store details of the job in the db
105
+ db .add_pending_job (job_id , ** input )
106
+ xloader_data_into_datastore_ (input , job_dict , logger )
91
107
job_dict ['status' ] = 'complete'
92
108
db .mark_job_as_completed (job_id , job_dict )
109
+ except sa .exc .IntegrityError as e :
110
+ db .mark_job_as_errored (job_id , str (e ))
111
+ job_dict ['status' ] = 'error'
112
+ job_dict ['error' ] = str (e )
113
+ log .error ('xloader error: job_id %s already exists' , job_id )
114
+ errored = True
93
115
except JobError as e :
94
116
db .mark_job_as_errored (job_id , str (e ))
95
117
job_dict ['status' ] = 'error'
96
118
job_dict ['error' ] = str (e )
97
- log .error ('xloader error: {0}, {1}' . format ( e , traceback .format_exc () ))
119
+ log .error ('xloader error: %s, %s' , e , traceback .format_exc ())
98
120
errored = True
99
121
except Exception as e :
100
122
if isinstance (e , RETRYABLE_ERRORS ):
101
123
tries = job_dict ['metadata' ].get ('tries' , 0 )
102
- if tries == 0 :
124
+ if tries < MAX_RETRIES :
125
+ tries = tries + 1
103
126
log .info ("Job %s failed due to temporary error [%s], retrying" , job_id , e )
104
127
job_dict ['status' ] = 'pending'
105
- job_dict ['metadata' ]['tries' ] = tries + 1
128
+ job_dict ['metadata' ]['tries' ] = tries
106
129
enqueue_job (
107
130
xloader_data_into_datastore ,
108
131
[input ],
132
+ title = "retry xloader_data_into_datastore: resource: {} attempt {}" .format (
133
+ job_dict ['metadata' ]['resource_id' ], tries ),
109
134
rq_kwargs = dict (timeout = RETRIED_JOB_TIMEOUT )
110
135
)
111
136
return None
@@ -114,7 +139,7 @@ def xloader_data_into_datastore(input):
114
139
job_id , traceback .format_tb (sys .exc_info ()[2 ])[- 1 ] + repr (e ))
115
140
job_dict ['status' ] = 'error'
116
141
job_dict ['error' ] = str (e )
117
- log .error ('xloader error: {0}, {1}' . format ( e , traceback .format_exc () ))
142
+ log .error ('xloader error: %s, %s' , e , traceback .format_exc ())
118
143
errored = True
119
144
finally :
120
145
# job_dict is defined in xloader_hook's docstring
@@ -125,7 +150,7 @@ def xloader_data_into_datastore(input):
125
150
return 'error' if errored else None
126
151
127
152
128
- def xloader_data_into_datastore_ (input , job_dict ):
153
+ def xloader_data_into_datastore_ (input , job_dict , logger ):
129
154
'''This function:
130
155
* downloads the resource (metadata) from CKAN
131
156
* downloads the data
@@ -134,26 +159,6 @@ def xloader_data_into_datastore_(input, job_dict):
134
159
135
160
(datapusher called this function 'push_to_datastore')
136
161
'''
137
- job_id = get_current_job ().id
138
- db .init (config )
139
-
140
- # Store details of the job in the db
141
- try :
142
- db .add_pending_job (job_id , ** input )
143
- except sa .exc .IntegrityError :
144
- raise JobError ('job_id {} already exists' .format (job_id ))
145
-
146
- # Set-up logging to the db
147
- handler = StoringHandler (job_id , input )
148
- level = logging .DEBUG
149
- handler .setLevel (level )
150
- logger = logging .getLogger (job_id )
151
- handler .setFormatter (logging .Formatter ('%(message)s' ))
152
- logger .addHandler (handler )
153
- # also show logs on stderr
154
- logger .addHandler (logging .StreamHandler ())
155
- logger .setLevel (logging .DEBUG )
156
-
157
162
validate_input (input )
158
163
159
164
data = input ['metadata' ]
@@ -197,10 +202,11 @@ def direct_load():
197
202
loader .calculate_record_count (
198
203
resource_id = resource ['id' ], logger = logger )
199
204
set_datastore_active (data , resource , logger )
200
- job_dict ['status' ] = 'running_but_viewable'
201
- callback_xloader_hook (result_url = input ['result_url' ],
202
- api_key = api_key ,
203
- job_dict = job_dict )
205
+ if 'result_url' in input :
206
+ job_dict ['status' ] = 'running_but_viewable'
207
+ callback_xloader_hook (result_url = input ['result_url' ],
208
+ api_key = api_key ,
209
+ job_dict = job_dict )
204
210
logger .info ('Data now available to users: %s' , resource_ckan_url )
205
211
loader .create_column_indexes (
206
212
fields = fields ,
0 commit comments