Skip to content

Commit b92e86f

Browse files
authored
Merge pull request #11 from BU-ISCIII/develop
Changes in Erika repositorio
2 parents 45dc936 + 7a53f5d commit b92e86f

File tree

4 files changed

+586
-2
lines changed

4 files changed

+586
-2
lines changed

relecov_tools/ena_upload.py

+149
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,149 @@
1+
import os
2+
import logging
3+
import rich.console
4+
from email import utils
5+
import json as j
6+
import xml.etree.cElementTree as e
7+
8+
import relecov_tools.utils
9+
10+
log = logging.getLogger(__name__)
11+
stderr = rich.console.Console(
12+
stderr=True,
13+
style="dim",
14+
highlight=False,
15+
force_terminal=relecov_tools.utils.rich_force_colors(),
16+
)
17+
18+
19+
class XmlCreation:
20+
def __init__(self, source_json=None, output_path=None, action=None):
21+
if source_json is None:
22+
self.source_json = utils.prompt_source_path()
23+
else:
24+
self.source_json = source_json
25+
if output_path is None:
26+
self.output_path = utils.prompt_destination_path()
27+
else:
28+
self.output_path = output_path
29+
if action is None:
30+
self.action = "ADD"
31+
else:
32+
self.action = action
33+
34+
def xml_study(
35+
self,
36+
):
37+
"""
38+
1.From validated json to xml study- submission.xml and project.xml
39+
1.1 Upload study info
40+
41+
2. From validated json to xml samples - submission.xml and samples.xml
42+
2.2 Upload samples info
43+
44+
3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
45+
4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
46+
"""
47+
48+
# Load validated json
49+
with open(self.source_json) as json_format_file:
50+
json_data = j.load(json_format_file)
51+
52+
# Create output directory
53+
54+
try:
55+
# Create target Directory
56+
os.mkdir(self.output_path)
57+
print("Directory ", self.output_path, " Created ")
58+
except FileExistsError:
59+
print("Directory ", self.output_path, " already exists")
60+
61+
# 1. From validated json to xml study- submission.xml and project.xml
62+
63+
# submission.xml
64+
os.chdir("xml_files/")
65+
if self.action.upper == "ADD":
66+
# submission add
67+
submission_file = "submission_add.xml"
68+
if self.action.upper() == "MODIFY":
69+
# submission modify
70+
submission_file = "submission_modify.xml"
71+
72+
# project_relecov.xml
73+
os.chdir("../conf")
74+
dict_conf = j.loads("configuration.json")
75+
r = e.Element("PROJECT_SET")
76+
project = e.SubElement(r, "PROJECT")
77+
project.set("alias", dict_conf["project_relecov_xml"]["alias"])
78+
e.SubElement(project, "TITLE").text = dict_conf["project_relecov_xml"][
79+
"TITLE"
80+
]
81+
e.SubElement(project, "DESCRIPTION").text = dict_conf[
82+
"project_relecov_xml"
83+
]["DESCRIPTION"]
84+
submission = e.SubElement(project, "SUBMISSION_PROJECT")
85+
e.SubElement(submission, "SEQUENCING_PROJECT")
86+
a = e.ElementTree(r)
87+
a.write(os.path.join(self.output_path, "study", "project_relecov.xml"))
88+
89+
# 1.1 Upload study info
90+
# 2. From validated json to xml samples - submission.xml and samples.xml
91+
92+
def xml_samples():
93+
# submission.xml
94+
os.chdir("../xml_files/")
95+
if self.action.upper == "ADD":
96+
# submission add
97+
submission_file = "submission_add.xml"
98+
if self.action.upper() == "MODIFY":
99+
# submission modify
100+
submission_file = "submission_modify.xml"
101+
102+
# samples_relecov.xml
103+
os.chdir("../schema/")
104+
json_data = j.loads("to_ena.json")
105+
os.chdir("../conf")
106+
dict_conf = j.loads("configuration.json")
107+
108+
data_keys = list(json_data.keys())
109+
r = e.Element("SAMPLE_SET")
110+
sample = e.SubElement(r, "SAMPLE")
111+
sample.set(
112+
"alias",
113+
"Programmatic Test SARS-CoV-2 Sample" + str(json_data["sample_name"]),
114+
)
115+
e.SubElement(sample, "TITLE").text = "SARS-CoV-2 Sample" + str(
116+
json_data["sample_name"]
117+
)
118+
sample_name = e.SubElement(sample, "SAMPLE_NAME")
119+
e.SubElement(sample_name, "TAXON_ID").text = dict_conf["fixed_data"][
120+
"tax_id"
121+
]
122+
e.SubElement(sample_name, "SCIENTIFIC_NAME").text = dict_conf["fixed_data"][
123+
"scientific_name"
124+
]
125+
e.SubElement(sample, "DESCRIPTION").text = "SARS-CoV-2 Sample" + str(
126+
json_data["sample_name"]
127+
)
128+
sample_attributes = e.SubElement(sample, "SAMPLE_ATTRIBUTES")
129+
for i in json_data:
130+
sample_attribute = e.SubElement(sample_attributes, "SAMPLE_ATTRIBUTE")
131+
e.SubElement(sample_attribute, "TAG").text = str(i)
132+
e.SubElement(sample_attribute, "VALUE").text = json_data[i]
133+
a = e.ElementTree(r)
134+
a.write(os.path.join(self.output_path, "samples", "samples_relecov.xml"))
135+
136+
# 2.2 Upload samples info
137+
138+
# 3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
139+
# 4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
140+
141+
142+
# Adaptation to ena_upload
143+
with open('../example_data/ena_upload.json','r') as f:
144+
data = j.loads(f.read())
145+
146+
df_study = pd.DataFrame.from_dict(data["study"])
147+
df_samples = pd.DataFrame.from_dict(data["samples"])
148+
df_runs = pd.DataFrame.from_dict(data["runs"])
149+
df_experiments = pd.DataFrame.from_dict(data["experiments"])

relecov_tools/ena_upload_buisciii.py

+174
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,174 @@
1+
import os
2+
import logging
3+
import rich.console
4+
from email import utils
5+
import json as j
6+
import xml.etree.cElementTree as e
7+
8+
import relecov_tools.utils
9+
10+
log = logging.getLogger(__name__)
11+
stderr = rich.console.Console(
12+
stderr=True,
13+
style="dim",
14+
highlight=False,
15+
force_terminal=relecov_tools.utils.rich_force_colors(),
16+
)
17+
18+
19+
class XmlCreation:
20+
def __init__(self, source_json=None, output_path=None, action=None):
21+
if source_json is None:
22+
self.source_json = utils.prompt_source_path()
23+
else:
24+
self.source_json = source_json
25+
if output_path is None:
26+
self.output_path = utils.prompt_destination_path()
27+
else:
28+
self.output_path = output_path
29+
if action is None:
30+
self.action = "ADD"
31+
else:
32+
self.action = action
33+
34+
def xml_study(
35+
self,
36+
):
37+
"""
38+
1.From validated json to xml study- submission.xml and project.xml
39+
1.1 Upload study info
40+
41+
2. From validated json to xml samples - submission.xml and samples.xml
42+
2.2 Upload samples info
43+
44+
3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
45+
4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest
46+
"""
47+
48+
# Load validated json
49+
with open(self.source_json) as json_format_file:
50+
json_data = j.load(json_format_file)
51+
52+
# Create output directory
53+
54+
try:
55+
# Create target Directory
56+
os.mkdir(self.output_path)
57+
print("Directory ", self.output_path, " Created ")
58+
except FileExistsError:
59+
print("Directory ", self.output_path, " already exists")
60+
61+
# 1. From validated json to xml study- submission.xml and project.xml
62+
63+
# submission.xml
64+
os.chdir("xml_files/")
65+
if self.action.upper == "ADD":
66+
# submission add
67+
submission_file = "submission_add.xml"
68+
if self.action.upper() == "MODIFY":
69+
# submission modify
70+
submission_file = "submission_modify.xml"
71+
72+
# project_relecov.xml
73+
os.chdir("../conf")
74+
dict_conf = j.loads("configuration.json")
75+
r = e.Element("PROJECT_SET")
76+
project = e.SubElement(r, "PROJECT")
77+
project.set("alias", dict_conf["project_relecov_xml"]["alias"])
78+
e.SubElement(project, "TITLE").text = dict_conf["project_relecov_xml"][
79+
"TITLE"
80+
]
81+
e.SubElement(project, "DESCRIPTION").text = dict_conf[
82+
"project_relecov_xml"
83+
]["DESCRIPTION"]
84+
submission = e.SubElement(project, "SUBMISSION_PROJECT")
85+
e.SubElement(submission, "SEQUENCING_PROJECT")
86+
a = e.ElementTree(r)
87+
a.write(os.path.join(self.output_path, "study", "project_relecov.xml"))
88+
89+
# 1.1 Upload study info
90+
"""
91+
import requests
92+
from requests.structures import CaseInsensitiveDict
93+
94+
url = "https://reqbin.com/echo/post/json"
95+
96+
headers = CaseInsensitiveDict()
97+
headers["Content-Type"] = "application/json"
98+
headers["Authorization"] = "Basic bG9naW46cGFzc3dvcmQ="
99+
100+
data = '{"login":"my_login","password":"my_password"}'
101+
102+
103+
resp = requests.post(url, headers=headers, data=data)
104+
105+
print(resp.status_code)
106+
"""
107+
108+
# 2. From validated json to xml samples - submission.xml and samples.xml
109+
110+
def xml_samples():
111+
# submission.xml
112+
os.chdir("../xml_files/")
113+
if self.action.upper == "ADD":
114+
# submission add
115+
submission_file = "submission_add.xml"
116+
if self.action.upper() == "MODIFY":
117+
# submission modify
118+
submission_file = "submission_modify.xml"
119+
120+
# samples_relecov.xml
121+
os.chdir("../schema/")
122+
json_data = j.loads("to_ena.json")
123+
os.chdir("../conf")
124+
dict_conf = j.loads("configuration.json")
125+
126+
data_keys = list(json_data.keys())
127+
r = e.Element("SAMPLE_SET")
128+
sample = e.SubElement(r, "SAMPLE")
129+
sample.set(
130+
"alias",
131+
"Programmatic Test SARS-CoV-2 Sample" + str(json_data["sample_name"]),
132+
)
133+
e.SubElement(sample, "TITLE").text = "SARS-CoV-2 Sample" + str(
134+
json_data["sample_name"]
135+
)
136+
sample_name = e.SubElement(sample, "SAMPLE_NAME")
137+
e.SubElement(sample_name, "TAXON_ID").text = dict_conf["fixed_data"][
138+
"tax_id"
139+
]
140+
e.SubElement(sample_name, "SCIENTIFIC_NAME").text = dict_conf["fixed_data"][
141+
"scientific_name"
142+
]
143+
e.SubElement(sample, "DESCRIPTION").text = "SARS-CoV-2 Sample" + str(
144+
json_data["sample_name"]
145+
)
146+
sample_attributes = e.SubElement(sample, "SAMPLE_ATTRIBUTES")
147+
for i in json_data:
148+
sample_attribute = e.SubElement(sample_attributes, "SAMPLE_ATTRIBUTE")
149+
e.SubElement(sample_attribute, "TAG").text = str(i)
150+
e.SubElement(sample_attribute, "VALUE").text = json_data[i]
151+
a = e.ElementTree(r)
152+
a.write(os.path.join(self.output_path, "samples", "samples_relecov.xml"))
153+
154+
# 2.2 Upload samples info
155+
"""
156+
import requests
157+
from requests.structures import CaseInsensitiveDict
158+
159+
url = "https://reqbin.com/echo/post/json"
160+
161+
headers = CaseInsensitiveDict()
162+
headers["Content-Type"] = "application/json"
163+
headers["Authorization"] = "Basic bG9naW46cGFzc3dvcmQ="
164+
165+
data = '{"login":"my_login","password":"my_password"}'
166+
167+
168+
resp = requests.post(url, headers=headers, data=data)
169+
170+
print(resp.status_code)
171+
"""
172+
173+
# 3. From sftp upload runs (FASTQ files programmatic)- experiments.xmlm, runs.xml and submission.xml
174+
# 4. From sftp upload sequences (FASTA files programmatic) - json using webin-cli-rest

relecov_tools/example_data/ena_upload.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
"study_abstract_1",
1717
"study_abstract_2"
1818
],
19-
"pubmed_id": [None,
19+
"pubmed_id": ["None",
2020
"pubmed_id_2"
2121
]
2222
},
@@ -157,7 +157,7 @@
157157
"single"
158158
],
159159
"insert_size": [
160-
"250",None,None
160+
"250","None","None"
161161
],
162162
"library_construction_protocol": [
163163
"library_construction_protocol_1",

0 commit comments

Comments
 (0)