nf-core · maxulysse · Dec 12, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -31,18 +31,19 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c
 - [#132](https://github.com/nf-core/seqinspector/pull/132) Added a bwamem2 index params for faster output
 - [#135](https://github.com/nf-core/seqinspector/pull/135) Added index section to MultiQC reports to facilitate report navigation (#125)
 - [#151](https://github.com/nf-core/seqinspector/pull/151) Added a prepare_genome subworkflow to handle bwamem2 indexing
+- [#156](https://github.com/nf-core/seqinspector/pull/156) Added relative sample_size and warning when a sample has less reads than desired sample_size.
 - [#158](https://github.com/nf-core/seqinspector/pull/158) Moved picard_collectmultiplemetrics to the subworkflow QC_BAM
 - [#159](https://github.com/nf-core/seqinspector/pull/159) Added a subworkflow QC_BAM including picard_collecthsmetrics for alignment QC of hybrid-selection data
 - [#162](https://github.com/nf-core/seqinspector/pull/162) Add tests for prepare_genome subworkflow
 
 ### `Fixed`
 
 - [#71](https://github.com/nf-core/seqinspector/pull/71) FASTQSCREEN does not fail when multiple reads are provided.
+- [#94](https://github.com/nf-core/seqinspector/issues/94) Go through and validate test data
 - [#99](https://github.com/nf-core/seqinspector/pull/99) Fix group reports for paired reads
 - [#107](https://github.com/nf-core/seqinspector/pull/107) Put SeqFU-stats section reports together
 - [#112](https://github.com/nf-core/seqinspector/pull/112) Making fastq_screen_references value to use parentDir
 - [#121](https://github.com/nf-core/seqinspector/pull/121) Cleanup sample naming for MultiQC report (#105)
-- [#94] (https://github.com/nf-core/seqinspector/issues/94) Go through and validate test data
 - [#162](https://github.com/nf-core/seqinspector/pull/162) Fix bugs in qc_bam and prepare_genome subworkflows and add tests
 - [#163](https://github.com/nf-core/seqinspector/pull/163) Run fastqscreen with subsampled data if available
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -98,7 +98,7 @@ genome: 'GRCh37'
 
 You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
 
-Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. Note that it refers to an absolute number.
+Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. Both absolute numbers (e.g 100) and relative numbers (e.g 0.25) can be specified.
 
 ```bash
 nextflow run nf-core/seqinspector --input ./samplesheet.csv --outdir ./results --sample_size 1000000 -profile docker

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -24,9 +24,9 @@
                     "fa_icon": "fas fa-file-csv"
                 },
                 "sample_size": {
-                    "type": "integer",
-                    "description": "Take this number of reads as a subset.",
-                    "help_text": "Choose the size of the subset or 0, if no subsampling shall be performed. Note that it refers to an absolute number.",
+                    "type": "number",
+                    "description": "Take a subset of reads for analysis.",
+                    "help_text": "Subset can be used as a fraction of reads (ex/ 0.20) or an absolute number of reads per sample (integer). Pipeline will still run if a sample has less reads than selected subset value.",
                     "default": 0
                 },
                 "outdir": {

diff --git a/tests/NovaSeq6000.main_subsample.nf.test b/tests/NovaSeq6000.main_subsample.nf.test
@@ -1,6 +1,6 @@
 nextflow_pipeline {
 
-    name "Test Workflow main.nf on NovaSeq6000 data sample size 90"
+    name "Test Workflow main.nf on NovaSeq6000 data with different sample sizes"
     script "../main.nf"
     tag "seqinspector"
     tag "PIPELINE"
@@ -38,4 +38,77 @@ nextflow_pipeline {
             )
         }
     }
+
+    test("NovaSeq6000 data test relative sample size") {
+
+        when {
+            config "./NovaSeq6000.main_subsample.nf.test.config"
+            params {
+                outdir = "$outputDir"
+                sample_size = 0.9
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(
+                params.outdir,
+                relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']
+            )
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(
+                params.outdir,
+                ignoreFile: 'tests/.nftignore'
+            )
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqinspector_software_mqc_versions.yml"),
+                    // All stable path names, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path
+                ).match() }
+            )
+        }
+    }
+
+    test("NovaSeq6000 data test sample size exceeds available reads") {
+        tag "warning"
+
+        when {
+            config "./NovaSeq6000.main_subsample.nf.test.config"
+            params {
+                outdir = "$outputDir"
+                sample_size = 120
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(
+                params.outdir,
+                relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']
+            )
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(
+                params.outdir,
+                ignoreFile: 'tests/.nftignore'
+            )
+            assert workflow.success
+            assertAll(
+                { assert snapshot(
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqinspector_software_mqc_versions.yml"),
+                    // All stable path names, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path,
+                    // get all messages containing Requested sample_size (120)
+                    filterNextflowOutput(workflow.stdout + workflow.stderr, ignore: ['Downloading plugin'], include:['Requested sample_size (120)'])
+                ).match() }
+            )
+        }
+    }
 }