@@ -30,7 +30,6 @@ class LongTableParse:
30
30
- saving_file(generated_JSON)
31
31
- parsing_csv() : It manages all this proccess:
32
32
- calling first to parse_a_list_of_dictionaries() and then calling to saving_file()
33
-
34
33
"""
35
34
36
35
def __init__ (self , file_path = None , output_directory = None ):
@@ -136,17 +135,32 @@ def convert_to_json(self, samp_dict):
136
135
j_list = []
137
136
# Grab date from filename
138
137
result_regex = re .search (
139
- "variants_long_table_(.*) .csv" , os .path .basename (self .file_path )
138
+ "variants_long_table(?:_\d{8})?\ .csv" , os .path .basename (self .file_path )
140
139
)
140
+ stderr .print (result_regex .group (0 ))
141
141
if result_regex is None :
142
- log .error ("Analysis date not found in filename, aborting" )
143
142
stderr .print (
144
- "[red]Error: filename must include analysis date in format YYYYMMDD"
143
+ "[red]\t WARN: Couldn't find variants long table file. Expected file name is:"
144
+ )
145
+ stderr .print (
146
+ "[red]\t \t - variants_long_table.csv or variants_long_table_YYYYMMDD.csv. Aborting..."
145
147
)
146
- stderr .print ("[red]e.g. variants_long_table_20220830.csv" )
147
148
sys .exit (1 )
149
+ else :
150
+ date_regex = re .search (r"(\d{8})" , result_regex .group ())
151
+ if date_regex is not None :
152
+ analysis_date = date_regex
153
+ stderr .print (
154
+ f"[green]\t Date { analysis_date .group ()} found in { self .file_path } "
155
+ )
156
+ else :
157
+ analysis_date = "Not Provided [GENEPIO:0001668]"
158
+ stderr .print (
159
+ f"[yellow]\t WARN:No analysis date found in long table: { self .file_path } "
160
+ )
161
+
148
162
for key , values in samp_dict .items ():
149
- j_dict = {"sample_name" : key , "analysis_date" : result_regex .group (1 )}
163
+ j_dict = {"sample_name" : key , "analysis_date" : analysis_date .group ()}
150
164
j_dict ["variants" ] = values
151
165
j_list .append (j_dict )
152
166
return j_list
@@ -182,12 +196,20 @@ def parsing_csv(self):
182
196
183
197
# START util functions
184
198
def handle_pangolin_data (files_list ):
185
- """File handler to parse pangolin data (csv) into JSON structured format."""
199
+ """File handler to parse pangolin data (csv) into JSON structured format.
200
+
201
+ Args:
202
+ files_list (list): A list with paths to pangolin files.
203
+
204
+ Returns:
205
+ pango_data_processed: A dictionary containing pangolin data handled.
206
+ """
186
207
method_name = f"{ handle_pangolin_data .__name__ } "
187
208
method_log_report = BioinfoReportLog ()
188
209
189
210
# Handling pangolin data
190
211
pango_data_processed = {}
212
+ valid_samples = []
191
213
try :
192
214
files_list_processed = relecov_tools .utils .select_most_recent_files_per_sample (
193
215
files_list
@@ -208,23 +230,39 @@ def handle_pangolin_data(files_list):
208
230
key .split ()[0 ]: value for key , value in pango_data .items ()
209
231
}
210
232
pango_data_processed .update (pango_data_updated )
211
- method_log_report .update_log_report (
212
- method_name , "valid" , f"Successfully handled data in { pango_file } ."
213
- )
233
+ valid_samples .append (pango_data_key .split ()[0 ])
214
234
except (FileNotFoundError , IndexError ) as e :
215
235
method_log_report .update_log_report (
216
- method_name , "error" , f"Error processing file { pango_file } : { e } "
236
+ method_name ,
237
+ "warning" ,
238
+ f"Error occurred while processing file { pango_file } : { e } " ,
217
239
)
218
- sys . exit ( method_log_report . print_log_report ( method_name , [ "error" ]))
240
+ continue
219
241
except Exception as e :
220
242
method_log_report .update_log_report (
221
- method_name , "error" , f"Error occurred while processing files: { e } "
243
+ method_name , "warning" , f"Error occurred while processing files: { e } "
244
+ )
245
+ if len (valid_samples ) > 0 :
246
+ method_log_report .update_log_report (
247
+ method_name ,
248
+ "valid" ,
249
+ f"Successfully handled data in samples: { ', ' .join (valid_samples )} " ,
222
250
)
223
- sys . exit ( method_log_report .print_log_report (method_name , ["error" ]) )
251
+ method_log_report .print_log_report (method_name , ["valid" , "warning" ] )
224
252
return pango_data_processed
225
253
226
254
227
255
def parse_long_table (files_list ):
256
+ """File handler to retrieve data from long table files and convert it into a JSON structured format.
257
+ This function utilizes the LongTableParse class to parse the long table data.
258
+ Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
259
+
260
+ Args:
261
+ files_list (list): A list of paths to long table files.
262
+
263
+ Returns:
264
+ None: Indicates that the function does not return any meaningful value.
265
+ """
228
266
method_name = f"{ parse_long_table .__name__ } "
229
267
method_log_report = BioinfoReportLog ()
230
268
@@ -250,7 +288,14 @@ def parse_long_table(files_list):
250
288
251
289
252
290
def handle_consensus_fasta (files_list ):
253
- """File handler to parse consensus fasta data (*.consensus.fa) into JSON structured format"""
291
+ """File handler to parse consensus data (fasta) into JSON structured format.
292
+
293
+ Args:
294
+ files_list (list): A list with paths to condensus files.
295
+
296
+ Returns:
297
+ consensus_data_processed: A dictionary containing consensus data handled.
298
+ """
254
299
method_name = f"{ handle_consensus_fasta .__name__ } "
255
300
method_log_report = BioinfoReportLog ()
256
301
0 commit comments