nf-core · maxulysse · Dec 12, 2025 · Nov 25, 2025 · Nov 25, 2025 · Nov 26, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -30,6 +30,7 @@ Initial release of nf-core/seqinspector, created with the [nf-core](https://nf-c
 - [#128](https://github.com/nf-core/seqinspector/pull/128) Added Picard tools - Collect Multiple Mterics to collect QC metrics
 - [#132](https://github.com/nf-core/seqinspector/pull/132) Added a bwamem2 index params for faster output
 - [#151](https://github.com/nf-core/seqinspector/pull/151) Added a prepare_genome subworkflow to handle bwamem2 indexing
+- [#156](https://github.com/nf-core/seqinspector/pull/156) Added relative sample_size and warning when a sample has less reads than desired sample_size.
 
 ### `Fixed`
 

diff --git a/docs/usage.md b/docs/usage.md
@@ -98,7 +98,7 @@ genome: 'GRCh37'
 
 You can also generate such `YAML`/`JSON` files via [nf-core/launch](https://nf-co.re/launch).
 
-Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. Note that it refers to an absolute number.
+Optionally, the `sample_size` parameter allows you to subset a random number of reads to be analysed. Both absolute numbers (e.g 100) and relative numbers (e.g 0.25) can be specified.
 
 ```bash
 nextflow run nf-core/seqinspector --input ./samplesheet.csv --outdir ./results --sample_size 1000000 -profile docker

diff --git a/nextflow_schema.json b/nextflow_schema.json
@@ -24,14 +24,13 @@
                     "fa_icon": "fas fa-file-csv"
                 },
                 "sample_size": {
-                    "type": "integer",
-                    "description": "Take this number of reads as a subset.",
-                    "help_text": "Choose the size of the subset or 0, if no subsampling shall be performed. Note that it refers to an absolute number.",
+                    "type": "number",
+                    "description": "Take a subset of reads for analysis.",
+                    "help_text": "Subset can be used as a fraction of reads (ex/ 0.20) or an absolute number of reads per sample (integer). Pipeline will still run if a sample has less reads than selected subset value.",
                     "default": 0
                 },
                 "outdir": {
                     "type": "string",
-                    "default": null,
                     "format": "directory-path",
                     "description": "The output directory where the results will be saved. You have to use absolute paths to storage on Cloud infrastructure.",
                     "fa_icon": "fas fa-folder-open"

diff --git a/tests/NovaSeq6000.main_subsample.nf.test b/tests/NovaSeq6000.main_subsample.nf.test
@@ -1,6 +1,6 @@
 nextflow_pipeline {
 
-    name "Test Workflow main.nf on NovaSeq6000 data sample size 90"
+    name "Test Workflow main.nf on NovaSeq6000 data with different sample sizes"
     script "../main.nf"
     tag "seqinspector"
     tag "PIPELINE"
@@ -38,4 +38,64 @@ nextflow_pipeline {
             )
         }
     }
+
+    test("NovaSeq6000 data test relative sample size") {
+
+        when {
+            config "./NovaSeq6000.main_subsample.nf.test.config"
+            params {
+                outdir = "$outputDir"
+                sample_size = 0.9
+            }
+        }
+
+        then {
+            // stable_name: All files + folders in ${params.outdir}/ with a stable name
+            def stable_name = getAllFilesFromDir(
+                params.outdir,
+                relative: true, includeDir: true, ignore: ['pipeline_info/*.{html,json,txt}']
+            )
+            // stable_path: All files in ${params.outdir}/ with stable content
+            def stable_path = getAllFilesFromDir(
+                params.outdir,
+                ignoreFile: 'tests/.nftignore'
+            )
+            assertAll(
+                { assert workflow.success},
+                { assert snapshot(
+                    // pipeline versions.yml file for multiqc from which Nextflow version is removed because we tests pipelines on multiple Nextflow versions
+                    removeNextflowVersion("$outputDir/pipeline_info/nf_core_seqinspector_software_mqc_versions.yml"),
+                    // All stable path names, with a relative path
+                    stable_name,
+                    // All files with stable contents
+                    stable_path
+                ).match() }
+            )
+        }
+    }
+    test("NovaSeq6000 data test sample size exceeds available reads") {
+
+        when {
+            config "./NovaSeq6000.main_subsample.nf.test.config"
+            params {
+                outdir = "$outputDir"
+                sample_size = 120
+            }
+        }
+
+        then {
+            def warn_messages = [
+                "Warning: Requested sample_size (120) is larger than available reads in Undetermined_5 (100). Pipeline will continue with 100 reads.",
+                "Warning: Requested sample_size (120) is larger than available reads in SampleA_2 (100). Pipeline will continue with 100 reads.",
+                "Warning: Requested sample_size (120) is larger than available reads in sampletest_4 (100). Pipeline will continue with 100 reads.",
+                "Warning: Requested sample_size (120) is larger than available reads in Sample23_3 (100). Pipeline will continue with 100 reads.",
+                "Warning: Requested sample_size (120) is larger than available reads in Sample1_1 (100). Pipeline will continue with 100 reads."
+            ]
+            assert workflow.success
+            for ( msg in warn_messages ) {
+                // Read warning from stdout as nf-test does not capture log messages
+                assert workflow.stdout.contains(msg)
+            }
+        }
+    }
 }