Skip to content

Commit 607e792

Browse files
authored
Merge pull request #378 from BU-ISCIII/develop
Release to 1.4.0
2 parents 2d1f7a0 + 367873e commit 607e792

13 files changed

+726
-134
lines changed

CHANGELOG.md

+14-1
Original file line numberDiff line numberDiff line change
@@ -4,18 +4,31 @@ All notable changes to this project will be documented in this file.
44

55
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

7-
## [1.X.0] - 202X-XX-XX : https://github.com/BU-ISCIII/relecov-tools/releases/tag/
7+
## [1.4.0] - 2025-01-27 : https://github.com/BU-ISCIII/relecov-tools/releases/tag/v1.4.0
88

99
### Credits
1010

1111
Code contributions to the release:
1212

13+
- [Sarai Varona](https://github.com/svarona)
14+
- [Alejandro Bernabeu](https://github.com/aberdur)
15+
- [Victor Lopez](https://github.com/victor5lm)
16+
1317
### Modules
1418

1519
#### Added enhancements
1620

21+
- Added a IonTorrent flow cell for validation [#363](https://github.com/BU-ISCIII/relecov-tools/pull/363)
22+
- Added solution to timeout in upload-to-ena module [#368](https://github.com/BU-ISCIII/relecov-tools/pull/368)
23+
- Added log functionality to build-schema module [#340](https://github.com/BU-ISCIII/relecov-tools/pull/340)
24+
- Updated the metadata_processing field in configuration.json and added the other_preparation_kit, quality_control_metrics and consensus_criteria fields in the json schema [#372](https://github.com/BU-ISCIII/relecov-tools/pull/372)
25+
- Added quality control functionality to read-bioinfo-metadata [#373](https://github.com/BU-ISCIII/relecov-tools/pull/373)
26+
- Added dropdown functionality to build-schema enums [#374](https://github.com/BU-ISCIII/relecov-tools/pull/374)
27+
1728
#### Fixes
1829

30+
- Fixed read-bioinfo-metadata module [#367](https://github.com/BU-ISCIII/relecov-tools/pull/367)
31+
1932
#### Changed
2033

2134
#### Removed

pyproject.toml

+62
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,62 @@
1+
[build-system]
2+
requires = ["setuptools>=61.0"]
3+
build-backend = "setuptools.build_meta"
4+
5+
[project]
6+
name = "relecov-tools"
7+
version = "1.4.0"
8+
description = "Tools for managing and proccessing relecov network data."
9+
readme = "README.md"
10+
requires-python = ">=3.7"
11+
authors = [
12+
{name = "Sara Monzon", email = "[email protected]"},
13+
{name = "Luis Chapado", email = "[email protected]"},
14+
{name = "Isabel Cuesta", email = "[email protected]"},
15+
{name = "Sarai Varona", email = "[email protected]"},
16+
{name = "Daniel Valle", email = "[email protected]"},
17+
{name = "Pablo Mata", email = "[email protected]"},
18+
{name = "Victor Lopez", email = "[email protected]"},
19+
{name = "Emi Arjona", email = "[email protected]"},
20+
{name = "Jaime Ozaez", email = "[email protected]"},
21+
{name = "Juan Ledesma", email = "[email protected]"},
22+
{name = "Sergio Olmos", email = "[email protected]"},
23+
{name = "Alejandro Bernabeu", email = "[email protected]"},
24+
{name = "Alba Talavera", email = "[email protected]"}
25+
]
26+
maintainers = [
27+
{name = "Sara Monzon", email = "[email protected]"},
28+
{name = "Luis Chapado", email = "[email protected]"},
29+
{name = "Isabel Cuesta", email = "[email protected]"},
30+
{name = "Sarai Varona", email = "[email protected]"},
31+
{name = "Daniel Valle", email = "[email protected]"},
32+
{name = "Pablo Mata", email = "[email protected]"},
33+
{name = "Victor Lopez", email = "[email protected]"},
34+
{name = "Emi Arjona", email = "[email protected]"},
35+
{name = "Jaime Ozaez", email = "[email protected]"},
36+
{name = "Juan Ledesma", email = "[email protected]"},
37+
{name = "Sergio Olmos", email = "[email protected]"},
38+
{name = "Alejandro Bernabeu", email = "[email protected]"},
39+
{name = "Alba Talavera", email = "[email protected]"}
40+
]
41+
keywords = [
42+
"relecov",
43+
"bioinformatics",
44+
"pipeline",
45+
"sequencing",
46+
"NGS",
47+
"next generation sequencing"
48+
]
49+
license = {text = "GNU GENERAL PUBLIC LICENSE v.3"}
50+
dynamic = ["dependencies"]
51+
52+
[project.urls]
53+
Homepage = "https://github.com/BU-ISCIII/relecov-tools"
54+
55+
[tool.setuptools.dynamic]
56+
dependencies = {file = ["requirements.txt"]}
57+
58+
[tool.setuptools.packages.find]
59+
exclude = ["docs"]
60+
61+
[project.scripts]
62+
relecov-tools = "relecov_tools.__main__:run_relecov_tools"

relecov_tools/__main__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
stderr=True, force_terminal=relecov_tools.utils.rich_force_colors()
3636
)
3737

38-
__version__ = "1.3.0"
38+
__version__ = "1.4.0"
3939

4040

4141
def run_relecov_tools():

relecov_tools/assets/pipeline_utils/viralrecon.py

+79-16
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,6 @@
88
import os.path
99

1010
from pathlib import Path
11-
from datetime import datetime
1211

1312
import relecov_tools.utils
1413
from relecov_tools.config_json import ConfigJson
@@ -135,7 +134,7 @@ def convert_to_json(self, samp_dict):
135134
j_list = []
136135
# Grab date from filename
137136
result_regex = re.search(
138-
"variants_long_table(?:_\d{8})?\.csv", os.path.basename(self.file_path)
137+
"variants_long_table(?:_\d{14})?\.csv", os.path.basename(self.file_path)
139138
)
140139
if result_regex is None:
141140
stderr.print(
@@ -153,18 +152,53 @@ def convert_to_json(self, samp_dict):
153152
j_list.append(j_dict)
154153
return j_list
155154

156-
def save_to_file(self, j_list):
155+
def save_to_file(self, j_list, batch_date):
157156
"""Transform the parsed data into a json file"""
158-
date_now = datetime.now().strftime("%Y%m%d%H%M%S")
159-
file_name = "long_table_" + date_now + ".json"
157+
file_name = "long_table_" + batch_date + ".json"
160158
file_path = os.path.join(self.output_directory, file_name)
161-
162-
try:
163-
with open(file_path, "w") as fh:
164-
fh.write(json.dumps(j_list, indent=4))
165-
stderr.print("[green]\tParsed data successfully saved to file:", file_path)
166-
except Exception as e:
167-
stderr.print("[red]\tError saving parsed data to file:", str(e))
159+
if os.path.exists(file_path):
160+
stderr.print(
161+
f"[blue]Long table {file_path} file already exists. Merging new data if possible."
162+
)
163+
log.info(
164+
"Long table %s file already exists. Merging new data if possible."
165+
% file_path
166+
)
167+
original_table = relecov_tools.utils.read_json_file(file_path)
168+
samples_indict = {item["sample_name"]: item for item in original_table}
169+
for item in j_list:
170+
sample_name = item["sample_name"]
171+
if sample_name in samples_indict:
172+
if samples_indict[sample_name] != item:
173+
stderr.print(
174+
f"[red]Same sample {sample_name} has different data in both long tables."
175+
)
176+
log.error(
177+
"Sample %s has different data in %s and new long table. Can't merge."
178+
% (sample_name, file_path)
179+
)
180+
return None
181+
else:
182+
original_table.append(item)
183+
try:
184+
with open(file_path, "w") as fh:
185+
fh.write(json.dumps(original_table, indent=4))
186+
stderr.print(
187+
"[green]\tParsed data successfully saved to file:", file_path
188+
)
189+
except Exception as e:
190+
stderr.print("[red]\tError saving parsed data to file:", str(e))
191+
log.error("Error saving parsed data to file: %s", e)
192+
else:
193+
try:
194+
with open(file_path, "w") as fh:
195+
fh.write(json.dumps(j_list, indent=4))
196+
stderr.print(
197+
"[green]\tParsed data successfully saved to file:", file_path
198+
)
199+
except Exception as e:
200+
stderr.print("[red]\tError saving parsed data to file:", str(e))
201+
log.error("Error saving parsed data to file: %s", e)
168202

169203
def parsing_csv(self):
170204
"""
@@ -180,7 +214,7 @@ def parsing_csv(self):
180214

181215

182216
# START util functions
183-
def handle_pangolin_data(files_list, output_folder=None):
217+
def handle_pangolin_data(files_list, batch_date, output_folder=None):
184218
"""File handler to parse pangolin data (csv) into JSON structured format.
185219
186220
Args:
@@ -320,7 +354,7 @@ def get_pango_data_version(files_list):
320354
return pango_data_processed
321355

322356

323-
def parse_long_table(files_list, output_folder=None):
357+
def parse_long_table(files_list, batch_date, output_folder=None):
324358
"""File handler to retrieve data from long table files and convert it into a JSON structured format.
325359
This function utilizes the LongTableParse class to parse the long table data.
326360
Since this utility handles and maps data using a custom way, it returns None to be avoid being transferred to method read_bioinfo_metadata.BioinfoMetadata.mapping_over_table().
@@ -349,7 +383,7 @@ def parse_long_table(files_list, output_folder=None):
349383
# Parsing long table data and saving it
350384
long_table_data = long_table.parsing_csv()
351385
# Saving long table data into a file
352-
long_table.save_to_file(long_table_data)
386+
long_table.save_to_file(long_table_data, batch_date)
353387
stderr.print("[green]\tProcess completed")
354388
elif len(files_list) > 1:
355389
method_log_report.update_log_report(
@@ -361,7 +395,7 @@ def parse_long_table(files_list, output_folder=None):
361395
return None
362396

363397

364-
def handle_consensus_fasta(files_list, output_folder=None):
398+
def handle_consensus_fasta(files_list, batch_date, output_folder=None):
365399
"""File handler to parse consensus data (fasta) into JSON structured format.
366400
367401
Args:
@@ -406,3 +440,32 @@ def handle_consensus_fasta(files_list, output_folder=None):
406440
)
407441
method_log_report.print_log_report(method_name, ["valid", "warning"])
408442
return consensus_data_processed
443+
444+
445+
def quality_control_evaluation(data):
446+
"""Evaluate the quality of the samples and add the field 'qc_test' to each 'data' entry."""
447+
conditions = {
448+
"per_sgene_ambiguous": lambda x: float(x) < 10,
449+
"per_sgene_coverage": lambda x: float(x) > 98,
450+
"per_ldmutations": lambda x: float(x) > 60,
451+
"number_of_sgene_frameshifts": lambda x: int(x) == 0,
452+
"number_of_unambiguous_bases": lambda x: int(x) > 24000,
453+
"number_of_Ns": lambda x: int(x) < 5000,
454+
"qc_filtered": lambda x: int(x) > 50000,
455+
"per_reads_host": lambda x: float(x) < 20,
456+
}
457+
for sample in data:
458+
try:
459+
qc_status = "pass"
460+
for param, condition in conditions.items():
461+
value = sample.get(param)
462+
if value is None or not condition(value):
463+
qc_status = "fail"
464+
break
465+
sample["qc_test"] = qc_status
466+
except ValueError as e:
467+
sample["qc_test"] = "fail"
468+
print(
469+
f"Error processing sample {sample.get('sequencing_sample_id', 'unknown')}: {e}"
470+
)
471+
return data

0 commit comments

Comments
 (0)