Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update Test Data for new Schema & Modify JSON Filepaths in read-bioinfo-metadata #427

Merged
merged 15 commits into from
Mar 12, 2025
Merged
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ Code contributions to the release:
- Remove wrapper from github actions (test_sftp_modules) [#421](https://github.com/BU-ISCIII/relecov-tools/pull/421)
- Add Validation for Dropdown Columns: Notify Users of Invalid Entries in build-schema module [#423](https://github.com/BU-ISCIII/relecov-tools/pull/423)
- Test SFTP Login by Updating Port Assignment in wrapper_manager [#426](https://github.com/BU-ISCIII/relecov-tools/pull/426)
- Update Test Data for new Schema & Modify JSON Filepaths in read-bioinfo-metadata [#427](https://github.com/BU-ISCIII/relecov-tools/pull/427)

#### Fixes

Expand Down
2 changes: 2 additions & 0 deletions relecov_tools/conf/bioinfo_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,7 @@
"multiple_samples": true,
"required": true,
"function": null,
"extract": true,
"content": {
"ns_per_100_kbp": "# Ns per 100kb consensus"
}
Expand All @@ -122,6 +123,7 @@
"required": true,
"multiple_samples": true,
"function": null,
"extract": true,
"content": {
"software_version": {
"bioinformatics_protocol_software_version": "nf-core/viralrecon",
Expand Down
13 changes: 10 additions & 3 deletions relecov_tools/conf/configuration.json
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
},
"metadata_lab_heading": [
"Organism",
"Public Health sample id (SIVIES)",
"Public Health sample id (SIVIRA)",
"Sample ID given by originating laboratory",
"Sample ID given by the submitting laboratory",
"Sample ID given in the microbiology lab",
Expand All @@ -41,7 +41,8 @@
"Environmental System",
"Collection Device",
"Host",
"Host Age",
"Host Age Years",
"Host Age Months",
"Host Gender",
"Sequencing Date",
"Nucleic acid extraction protocol",
Expand All @@ -66,7 +67,13 @@
"Diagnostic Pcr Ct Value-2",
"Authors",
"Sequence file R1 fastq",
"Sequence file R2 fastq"
"Sequence file R2 fastq",
"Vaccinated",
"Specific medication for treatment or prophylaxis",
"Hospitalization",
"Admission to intensive care unit",
"Death",
"Immunosuppression"
],
"alt_heading_equivalences": {
"Sample ID": "Sample ID given for sequencing",
Expand Down
5 changes: 5 additions & 0 deletions relecov_tools/dataprocess_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,12 @@ def clean_module_params(self, module, params):
return module_valid_params

def exec_download(self, download_params):
if "sftp_port" in download_params:
sftp_port = download_params.pop("sftp_port", None)
download_manager = DownloadManager(**download_params)
if sftp_port is not None:
download_manager.relecov_sftp.sftp_port = int(sftp_port)
print(f"SFTP port assigned: {download_manager.relecov_sftp.sftp_port}")
download_manager.execute_process()
finished_folders = download_manager.finished_folders
download_logs = self.wrapper_logsum.prepare_final_logs(
Expand Down
7 changes: 5 additions & 2 deletions relecov_tools/download_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,8 +344,11 @@ def read_metadata_file(self, meta_f_path, return_data=True):
error_text = "Header could not be found for excel file %s"
raise MetadataError(str(error_text % os.path.basename(meta_f_path)))
for cell in ws_metadata_lab[header_row]:
cell.value = cell.value.strip()
metadata_header = [x.value for x in ws_metadata_lab[header_row]]
if cell.value is not None:
cell.value = cell.value.strip()
metadata_header = [
x.value for x in ws_metadata_lab[header_row] if x.value is not None
]
meta_column_list = self.metadata_lab_heading
if meta_column_list != metadata_header[1:]:
diffs = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,24 @@
"210141": {
"fastq_r1_md5": "eab8b05ef27f4f5cba5cddf6ad627de2",
"fastq_r2_md5": "d82a37aa970df2b8bf8f547ca7c18ac8",
"r1_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/media/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/media/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"sequence_file_R1_fastq": "210141_R1.fastq.gz",
"sequence_file_R2_fastq": "210141_R2.fastq.gz"
},
"2018086": {
"fastq_r1_md5": "b5242d60471e5a5a97b35531dbbe8c30",
"fastq_r2_md5": "57525c5a1ec992098e652aa01b366d69",
"r1_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/media/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/media/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"sequence_file_R1_fastq": "2018086_R1.fastq.gz",
"sequence_file_R2_fastq": "2018086_R2.fastq.gz"
},
"2018102": {
"fastq_r1_md5": "b5242d60471e5a5a97b35531dbbe8c30",
"fastq_r2_md5": "57525c5a1ec992098e652aa01b366d69",
"r1_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/media/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/media/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/media/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"sequence_file_R1_fastq": "2018102_R1.fastq.gz",
"sequence_file_R2_fastq": "2018102_R2.fastq.gz"
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
"number_of_samples_in_run": "",
"organism": "Severe acute respiratory syndrome coronavirus 2",
"purpose_sampling": "Surveillance",
"r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"rna_extraction_protocol": "RT-PCR",
"runID": "MiSeaq_GN_195",
"run_alias": "2018086_R1.fastq.gz_2018086_R2.fastq.gz",
Expand Down Expand Up @@ -132,8 +132,8 @@
"number_of_samples_in_run": "",
"organism": "Severe acute respiratory syndrome coronavirus 2",
"purpose_sampling": "Surveillance",
"r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"rna_extraction_protocol": "RT-PCR",
"runID": "MiSeaq_GN_195",
"run_alias": "2018102_R1.fastq.gz_2018102_R2.fastq.gz",
Expand Down Expand Up @@ -212,8 +212,8 @@
"number_of_samples_in_run": "",
"organism": "Severe acute respiratory syndrome coronavirus 2",
"purpose_sampling": "Surveillance",
"r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"rna_extraction_protocol": "RT-PCR",
"runID": "MiSeaq_GN_195",
"run_alias": "2018109_R1.fastq.gz_2018109_R2.fastq.gz",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
"number_of_samples_in_run": "",
"organism": "Severe acute respiratory syndrome coronavirus 2",
"purpose_sampling": "Surveillance",
"r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"rna_extraction_protocol": "RT-PCR",
"runID": "MiSeaq_GN_195",
"run_alias": "2018086_R1.fastq.gz_2018086_R2.fastq.gz",
Expand Down Expand Up @@ -132,8 +132,8 @@
"number_of_samples_in_run": "",
"organism": "Severe acute respiratory syndrome coronavirus 2",
"purpose_sampling": "Surveillance",
"r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"rna_extraction_protocol": "RT-PCR",
"runID": "MiSeaq_GN_195",
"run_alias": "2018102_R1.fastq.gz_2018102_R2.fastq.gz",
Expand Down Expand Up @@ -212,8 +212,8 @@
"number_of_samples_in_run": "",
"organism": "Severe acute respiratory syndrome coronavirus 2",
"purpose_sampling": "Surveillance",
"r1_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r1_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"r2_fastq_filepath": "/data/bioinfoshare/UCCT_Relecov/COD-2100-MAD-CNM/muestras_analizadas_viralrecon_20220720",
"rna_extraction_protocol": "RT-PCR",
"runID": "MiSeaq_GN_195",
"run_alias": "2018109_R1.fastq.gz_2018109_R2.fastq.gz",
Expand Down
Loading
Loading