Skip to content

Commit d2b0834

Browse files
committed
Implement sub html report for each barcode*
*for each barcode found in the sample sheet
1 parent f765393 commit d2b0834

File tree

1 file changed

+81
-14
lines changed

1 file changed

+81
-14
lines changed

sequana_pipelines/nanomerge/nanomerge.rules

+81-14
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,19 @@ expected_fastqs = expand("./{project}/{sample}.fastq.gz", zip, project=samples.d
8383
sample=samples.df['sample'])
8484

8585

86-
rule pipeline:
87-
input:
88-
expected_fastqs,
89-
svg = ".sequana/rulegraph.svg",
90-
html="summary.html"
86+
if "barcode" in samples.df.columns:
87+
rule pipeline:
88+
input:
89+
expected_fastqs,
90+
svg = ".sequana/rulegraph.svg",
91+
html="summary.html",
92+
else:
93+
rule pipeline:
94+
input:
95+
expected_fastqs,
96+
html="summary.html",
97+
svg = ".sequana/rulegraph.svg",
98+
subsummary=expand("pycoqc/{barcode}_summary.html", barcode=samples.df.barcode.values)
9199

92100

93101
def get_input_merge(wildcards):
@@ -152,7 +160,6 @@ if config["summary"]:
152160
else:
153161
subdf.to_csv(output[0], header=False, mode="a+", index=False, sep='\t')
154162

155-
156163
rule pyco:
157164
input:
158165
"sub_sample_summary/summary.txt"
@@ -169,6 +176,49 @@ if config["summary"]:
169176
pycoQC --summary_file {input} -o {output} {params.options} > {log} 2>&1
170177
"""
171178

179+
if "barcode" in samples.df.columns:
180+
rule split_barcode:
181+
input: "sub_sample_summary/summary.txt"
182+
output: expand("sub_sample_summary/{barcode}_summary.txt", barcode=samples.df.barcode)
183+
run:
184+
import pandas as pd
185+
186+
headers = {}
187+
for barcode in samples.df.barcode:
188+
headers[barcode] = True
189+
190+
with pd.read_csv(input[0], chunksize=100000, sep="\t") as reader:
191+
192+
for i,chunk in enumerate(reader):
193+
for barcode in samples.df.barcode:
194+
filename= f"sub_sample_summary/{barcode}_summary.txt"
195+
subdf = chunk.query("alias==@barcode")
196+
if len(subdf):
197+
if headers[barcode] is True:
198+
subdf.to_csv(filename, header=True, mode="w", index=False, sep='\t')
199+
headers[barcode] = False
200+
else:
201+
subdf.to_csv(filename, header=False, mode="a+", index=False, sep='\t')
202+
203+
rule pycoqc_per_barcode:
204+
input:
205+
"sub_sample_summary/{barcode}_summary.txt"
206+
output:
207+
"pycoqc/{barcode}_summary.html"
208+
log:
209+
"pycoqc/{barcode}.log"
210+
params:
211+
options=config["pycoqc"]["options"]
212+
container:
213+
config["apptainers"]["pycoqc"]
214+
shell:
215+
"""
216+
pycoQC --summary_file {input} -o {output} {params.options} > {log} 2>&1
217+
"""
218+
219+
220+
221+
172222

173223
rule merge:
174224
input:
@@ -204,7 +254,6 @@ rule dot2svg:
204254
shell:
205255
"""dot -Tsvg {input} -o {output}"""
206256

207-
rule get_stats:
208257

209258

210259
rule html_report:
@@ -221,10 +270,6 @@ rule html_report:
221270
"rulegraph": ".sequana/rulegraph.svg",
222271
"pipeline_version": nanomerge.version}
223272

224-
manager.teardown(extra_files_to_remove=["pyco/pyco.log", "pyco/pyco.html"])
225-
226-
227-
228273
dirs = ",".join([f'<a href="{x}/">{x}</a>' for x in samples.get_projects()])
229274

230275
def get_stats():
@@ -264,8 +309,21 @@ rule html_report:
264309
"N50": [N50s[k] for k in sorted(sample_names.keys())],
265310
"mean read length": [mus[k] for k in sorted(sample_names.keys())],
266311
"Number of reads": [nreads[k] for k in sorted(sample_names.keys())]
267-
},
268-
index=sample_names)
312+
}
313+
)
314+
315+
# add a column with potential links to the
316+
if "barcode" in samples.df.columns:
317+
links = []
318+
319+
for barcode in df.barcodes.values:
320+
if barcode in samples.df.barcode.values:
321+
links.append(f"pycoqc/{barcode}_summary.html")
322+
else:
323+
links.append("")
324+
df['link'] = links
325+
df = df.rename({"index": "sample"}, axis=1)
326+
269327

270328
total_reads = sum([nreads[k] for k in nreads.keys()])
271329

@@ -276,6 +334,11 @@ rule html_report:
276334
'bSort': 'true',
277335
'dom':"RSPrtp"
278336
}
337+
338+
# add link to the sub html report
339+
if "barcode" in samples.df.columns:
340+
datatable.datatable.set_links_to_column('link', 'sample')
341+
279342
js = datatable.create_javascript_function()
280343
htmltable = datatable.create_datatable()
281344

@@ -290,7 +353,7 @@ rule html_report:
290353
s = next(FastA(input[0]))
291354
try:
292355
model = [x.split("=")[1] for x in s.comment.split() if "model_version_id" in x][0]
293-
except IndexError:
356+
except (IndexError, AttributeError):
294357
model = "unknown"
295358
return model
296359

@@ -328,6 +391,10 @@ rule html_report:
328391
onsuccess:
329392

330393
print("Once done, please clean up the directory using\n'make clean'")
394+
395+
manager.teardown(extra_files_to_remove=["pyco/pyco.log", "pyco/pyco.html"])
396+
397+
331398
shell("chmod -R g+w .")
332399
shell("rm -rf pyco rulegraph")
333400

0 commit comments

Comments
 (0)