Skip to content

Commit 1f28585

Browse files
committed
add reviewer sugestions #258
1 parent bdbb1d3 commit 1f28585

File tree

4 files changed

+192
-47
lines changed

4 files changed

+192
-47
lines changed

relecov_tools/__main__.py

-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@
1717
import relecov_tools.map_schema
1818
import relecov_tools.feed_database
1919
import relecov_tools.read_bioinfo_metadata
20-
import relecov_tools.long_table_parse
2120
import relecov_tools.metadata_homogeneizer
2221
import relecov_tools.gisaid_upload
2322
import relecov_tools.upload_ena_protocol

relecov_tools/assets/pipeline_utils/viralrecon.py

+60-15
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@ class LongTableParse:
3030
- saving_file(generated_JSON)
3131
- parsing_csv() : It manages all this proccess:
3232
- calling first to parse_a_list_of_dictionaries() and then calling to saving_file()
33-
3433
"""
3534

3635
def __init__(self, file_path=None, output_directory=None):
@@ -136,17 +135,32 @@ def convert_to_json(self, samp_dict):
136135
j_list = []
137136
# Grab date from filename
138137
result_regex = re.search(
139-
"variants_long_table_(.*).csv", os.path.basename(self.file_path)
138+
"variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path)
140139
)
140+
stderr.print(result_regex.group(0))
141141
if result_regex is None:
142-
log.error("Analysis date not found in filename, aborting")
143142
stderr.print(
144-
"[red]Error: filename must include analysis date in format YYYYMMDD"
143+
"[red]\tWARN: Couldn't find variants long table file. Expected file name is:"
144+
)
145+
stderr.print(
146+
"[red]\t\t- variants_long_table.csv or variants_long_table_YYYYMMDD.csv. Aborting..."
145147
)
146-
stderr.print("[red]e.g. variants_long_table_20220830.csv")
147148
sys.exit(1)
149+
else:
150+
date_regex = re.search(r"(\d{8})", result_regex.group())
151+
if date_regex is not None:
152+
analysis_date = date_regex
153+
stderr.print(
154+
f"[green]\tDate {analysis_date.group()} found in {self.file_path}"
155+
)
156+
else:
157+
analysis_date = "Not Provided [GENEPIO:0001668]"
158+
stderr.print(
159+
f"[yellow]\tWARN:No analysis date found in long table: {self.file_path}"
160+
)
161+
148162
for key, values in samp_dict.items():
149-
j_dict = {"sample_name": key, "analysis_date": result_regex.group(1)}
163+
j_dict = {"sample_name": key, "analysis_date": analysis_date.group()}
150164
j_dict["variants"] = values
151165
j_list.append(j_dict)
152166
return j_list
@@ -182,12 +196,20 @@ def parsing_csv(self):
182196

183197
# START util functions
184198
def handle_pangolin_data(files_list):
185-
"""File handler to parse pangolin data (csv) into JSON structured format."""
199+
"""File handler to parse pangolin data (csv) into JSON structured format.
200+
201+
Args:
202+
files_list (list): A list with paths to pangolin files.
203+
204+
Returns:
205+
pango_data_processed: A dictionary containing pangolin data handled.
206+
"""
186207
method_name = f"{handle_pangolin_data.__name__}"
187208
method_log_report = BioinfoReportLog()
188209

189210
# Handling pangolin data
190211
pango_data_processed = {}
212+
valid_samples = []
191213
try:
192214
files_list_processed = relecov_tools.utils.select_most_recent_files_per_sample(
193215
files_list
@@ -208,23 +230,39 @@ def handle_pangolin_data(files_list):
208230
key.split()[0]: value for key, value in pango_data.items()
209231
}
210232
pango_data_processed.update(pango_data_updated)
211-
method_log_report.update_log_report(
212-
method_name, "valid", f"Successfully handled data in {pango_file}."
213-
)
233+
valid_samples.append(pango_data_key.split()[0])
214234
except (FileNotFoundError, IndexError) as e:
215235
method_log_report.update_log_report(
216-
method_name, "error", f"Error processing file {pango_file}: {e}"
236+
method_name,
237+
"warning",
238+
f"Error occurred while processing file {pango_file}: {e}",
217239
)
218-
sys.exit(method_log_report.print_log_report(method_name, ["error"]))
240+
continue
219241
except Exception as e:
220242
method_log_report.update_log_report(
221-
method_name, "error", f"Error occurred while processing files: {e}"
243+
method_name, "warning", f"Error occurred while processing files: {e}"
244+
)
245+
if len(valid_samples) > 0:
246+
method_log_report.update_log_report(
247+
method_name,
248+
"valid",
249+
f"Successfully handled data in samples: {', '.join(valid_samples)}",
222250
)
223-
sys.exit(method_log_report.print_log_report(method_name, ["error"]))
251+
method_log_report.print_log_report(method_name, ["valid", "warning"])
224252
return pango_data_processed
225253

226254

227255
def parse_long_table(files_list):
256+
"""File handler to retrieve data from long table files and convert it into a JSON structured format.
257+
This function utilizes the LongTableParse class to parse the long table data.
258+
Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
259+
260+
Args:
261+
files_list (list): A list of paths to long table files.
262+
263+
Returns:
264+
None: Indicates that the function does not return any meaningful value.
265+
"""
228266
method_name = f"{parse_long_table.__name__}"
229267
method_log_report = BioinfoReportLog()
230268

@@ -250,7 +288,14 @@ def parse_long_table(files_list):
250288

251289

252290
def handle_consensus_fasta(files_list):
253-
"""File handler to parse consensus fasta data (*.consensus.fa) into JSON structured format"""
291+
"""File handler to parse consensus data (fasta) into JSON structured format.
292+
293+
Args:
294+
files_list (list): A list with paths to condensus files.
295+
296+
Returns:
297+
consensus_data_processed: A dictionary containing consensus data handled.
298+
"""
254299
method_name = f"{handle_consensus_fasta.__name__}"
255300
method_log_report = BioinfoReportLog()
256301

relecov_tools/conf/bioinfo_config.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"viralrecon": {
33
"mapping_stats": {
4-
"fn": "mapping_illumina_stats.tab",
4+
"fn": "mapping_illumina_stats(?:_\\d{8})?\\.tab",
55
"sample_col_idx": 5,
66
"header_row_idx": 1,
77
"required": true,
@@ -37,7 +37,7 @@
3737
}
3838
},
3939
"variants_long_table": {
40-
"fn": "variants_long_table(?:_\\d{8})?\\.csv$",
40+
"fn": "variants_long_table(?:_\\d{8})?\\.csv",
4141
"sample_col_idx": 1,
4242
"header_row_idx": 1,
4343
"required": true,

0 commit comments

Comments
 (0)