@@ -83,11 +83,19 @@ expected_fastqs = expand("./{project}/{sample}.fastq.gz", zip, project=samples.d
83
83
sample=samples.df['sample'])
84
84
85
85
86
- rule pipeline:
87
- input:
88
- expected_fastqs,
89
- svg = ".sequana/rulegraph.svg",
90
- html="summary.html"
86
+ if "barcode" in samples.df.columns:
87
+ rule pipeline:
88
+ input:
89
+ expected_fastqs,
90
+ svg = ".sequana/rulegraph.svg",
91
+ html="summary.html",
92
+ else:
93
+ rule pipeline:
94
+ input:
95
+ expected_fastqs,
96
+ html="summary.html",
97
+ svg = ".sequana/rulegraph.svg",
98
+ subsummary=expand("pycoqc/{barcode}_summary.html", barcode=samples.df.barcode.values)
91
99
92
100
93
101
def get_input_merge(wildcards):
@@ -152,7 +160,6 @@ if config["summary"]:
152
160
else:
153
161
subdf.to_csv(output[0], header=False, mode="a+", index=False, sep='\t')
154
162
155
-
156
163
rule pyco:
157
164
input:
158
165
"sub_sample_summary/summary.txt"
@@ -169,6 +176,49 @@ if config["summary"]:
169
176
pycoQC --summary_file {input} -o {output} {params.options} > {log} 2>&1
170
177
"""
171
178
179
+ if "barcode" in samples.df.columns:
180
+ rule split_barcode:
181
+ input: "sub_sample_summary/summary.txt"
182
+ output: expand("sub_sample_summary/{barcode}_summary.txt", barcode=samples.df.barcode)
183
+ run:
184
+ import pandas as pd
185
+
186
+ headers = {}
187
+ for barcode in samples.df.barcode:
188
+ headers[barcode] = True
189
+
190
+ with pd.read_csv(input[0], chunksize=100000, sep="\t") as reader:
191
+
192
+ for i,chunk in enumerate(reader):
193
+ for barcode in samples.df.barcode:
194
+ filename= f"sub_sample_summary/{barcode}_summary.txt"
195
+ subdf = chunk.query("alias==@barcode")
196
+ if len(subdf):
197
+ if headers[barcode] is True:
198
+ subdf.to_csv(filename, header=True, mode="w", index=False, sep='\t')
199
+ headers[barcode] = False
200
+ else:
201
+ subdf.to_csv(filename, header=False, mode="a+", index=False, sep='\t')
202
+
203
+ rule pycoqc_per_barcode:
204
+ input:
205
+ "sub_sample_summary/{barcode}_summary.txt"
206
+ output:
207
+ "pycoqc/{barcode}_summary.html"
208
+ log:
209
+ "pycoqc/{barcode}.log"
210
+ params:
211
+ options=config["pycoqc"]["options"]
212
+ container:
213
+ config["apptainers"]["pycoqc"]
214
+ shell:
215
+ """
216
+ pycoQC --summary_file {input} -o {output} {params.options} > {log} 2>&1
217
+ """
218
+
219
+
220
+
221
+
172
222
173
223
rule merge:
174
224
input:
@@ -204,7 +254,6 @@ rule dot2svg:
204
254
shell:
205
255
"""dot -Tsvg {input} -o {output}"""
206
256
207
- rule get_stats:
208
257
209
258
210
259
rule html_report:
@@ -221,10 +270,6 @@ rule html_report:
221
270
"rulegraph": ".sequana/rulegraph.svg",
222
271
"pipeline_version": nanomerge.version}
223
272
224
- manager.teardown(extra_files_to_remove=["pyco/pyco.log", "pyco/pyco.html"])
225
-
226
-
227
-
228
273
dirs = ",".join([f'<a href="{x}/">{x}</a>' for x in samples.get_projects()])
229
274
230
275
def get_stats():
@@ -264,8 +309,21 @@ rule html_report:
264
309
"N50": [N50s[k] for k in sorted(sample_names.keys())],
265
310
"mean read length": [mus[k] for k in sorted(sample_names.keys())],
266
311
"Number of reads": [nreads[k] for k in sorted(sample_names.keys())]
267
- },
268
- index=sample_names)
312
+ }
313
+ )
314
+
315
+ # add a column with potential links to the
316
+ if "barcode" in samples.df.columns:
317
+ links = []
318
+
319
+ for barcode in df.barcodes.values:
320
+ if barcode in samples.df.barcode.values:
321
+ links.append(f"pycoqc/{barcode}_summary.html")
322
+ else:
323
+ links.append("")
324
+ df['link'] = links
325
+ df = df.rename({"index": "sample"}, axis=1)
326
+
269
327
270
328
total_reads = sum([nreads[k] for k in nreads.keys()])
271
329
@@ -276,6 +334,11 @@ rule html_report:
276
334
'bSort': 'true',
277
335
'dom':"RSPrtp"
278
336
}
337
+
338
+ # add link to the sub html report
339
+ if "barcode" in samples.df.columns:
340
+ datatable.datatable.set_links_to_column('link', 'sample')
341
+
279
342
js = datatable.create_javascript_function()
280
343
htmltable = datatable.create_datatable()
281
344
@@ -290,7 +353,7 @@ rule html_report:
290
353
s = next(FastA(input[0]))
291
354
try:
292
355
model = [x.split("=")[1] for x in s.comment.split() if "model_version_id" in x][0]
293
- except IndexError:
356
+ except ( IndexError, AttributeError) :
294
357
model = "unknown"
295
358
return model
296
359
@@ -328,6 +391,10 @@ rule html_report:
328
391
onsuccess:
329
392
330
393
print("Once done, please clean up the directory using\n'make clean'")
394
+
395
+ manager.teardown(extra_files_to_remove=["pyco/pyco.log", "pyco/pyco.html"])
396
+
397
+
331
398
shell("chmod -R g+w .")
332
399
shell("rm -rf pyco rulegraph")
333
400
0 commit comments