Skip to content

Commit 394f7ad

Browse files
committed
Handle very large number of files using find cmd
1 parent 13e27dc commit 394f7ad

File tree

5 files changed

+15
-9
lines changed

5 files changed

+15
-9
lines changed

.github/workflows/main.yml

-5
Original file line numberDiff line numberDiff line change
@@ -40,11 +40,6 @@ jobs:
4040
# $CONDA is an environment variable pointing to the root of the miniconda directory
4141
echo $CONDA/bin >> $GITHUB_PATH
4242
43-
#- name: conda
44-
# run: |
45-
# conda install -c conda-forge -c bioconda --quiet -y python=${{ matrix.python }} mamba
46-
# mamba install pycoqc
47-
4843
- name: Install dependencies
4944
run: |
5045
pip install pycoQC

README.rst

+2
Original file line numberDiff line numberDiff line change
@@ -147,6 +147,8 @@ Changelog
147147
========= ====================================================================
148148
Version Description
149149
========= ====================================================================
150+
1.2.0 * handle large promethium run by using find+cat instead of just cat
151+
to cope with very large number of input files.
150152
1.1.0 * add subsample option and set to 1,000,000 reads to handle large
151153
runs such as promethion
152154
1.0.1 * CSV can now handle sample or samplename column name in samplesheet.

requirements.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
sequana_pipetools>=0.8.1
1+
sequana_pipetools>=0.12.2
22
sequana

sequana_pipelines/nanomerge/nanomerge.rules

+11-2
Original file line numberDiff line numberDiff line change
@@ -96,6 +96,13 @@ def get_input_merge(wildcards):
9696
filenames = list((input_directory).glob(input_pattern))
9797
return filenames
9898

99+
def get_input_directory(wildcards):
100+
if samples.barcoded:
101+
barcode = samples.get_barcode_from_sample(wildcards.sample)
102+
return input_directory / barcode
103+
else:
104+
return input_directory
105+
99106

100107
if config["summary"]:
101108
rule pyco:
@@ -120,9 +127,11 @@ rule merge:
120127
get_input_merge
121128
output:
122129
"./{project}/{sample}.fastq.gz"
130+
params:
131+
indir=get_input_directory
123132
shell:
124133
"""
125-
cat {input} > {output}
134+
find {params.indir} -type f -name "*fastq.gz" -exec cat {{}} + > {output}
126135
"""
127136

128137

@@ -168,7 +177,7 @@ rule html_report:
168177
pycodata = fout.read()
169178
pycodata = '<div class="columns">' + pycodata.split('<div class="columns">')[-1].replace("</div>\n</body>\n</html>","")
170179

171-
s = SummaryModule2(data, f"Your data are available in {dirs} directories" + pycodata)
180+
s = SummaryModule2(data, f"""Your data are available in {dirs} directories. Please see the summary plots here below (if sequence summary was provided), generated with <a href="https://github.com/a-slide/pycoQC">pycoQC</a> software.""" + pycodata)
172181
else:
173182
s = SummaryModule2(data, f"no summary found. Please checkout the sub directories {dirs}. They should contain your final fastq files for each project.")
174183

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010

1111

1212
_MAJOR = 1
13-
_MINOR = 1
13+
_MINOR = 2
1414
_MICRO = 0
1515
version = f"{_MAJOR}.{_MINOR}.{_MICRO}"
1616
release = f"{_MAJOR}.{_MINOR}"

0 commit comments

Comments
 (0)