From fe6a8126a6a3b5b7d749856304258c42db10719a Mon Sep 17 00:00:00 2001 From: meesters Date: Fri, 14 Nov 2025 12:12:46 +0100 Subject: [PATCH 01/17] fix: extracting job id from convoluted output, if necessary --- snakemake_executor_plugin_slurm/__init__.py | 6 ++-- snakemake_executor_plugin_slurm/validation.py | 34 +++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py index 583b0773..4d48e06e 100644 --- a/snakemake_executor_plugin_slurm/__init__.py +++ b/snakemake_executor_plugin_slurm/__init__.py @@ -35,7 +35,7 @@ ) from .efficiency_report import create_efficiency_report from .submit_string import get_submit_command -from .validation import validate_slurm_extra +from .validation import validate_slurm_job_id, validate_slurm_extra @dataclass @@ -382,8 +382,10 @@ def run_job(self, job: JobExecutorInterface): # To extract the job id we split by semicolon and take the first element # (this also works if no cluster name was provided) slurm_jobid = out.strip().split(";")[0] + # this slurm_jobid might be wrong: some cluster admin give convulted + # sbatch outputs. So we need to validate it properly: if not slurm_jobid: - raise WorkflowError("Failed to retrieve SLURM job ID from sbatch output.") + slurm_jobid = validate_slurm_job_id(slurm_jobidout) slurm_logfile = slurm_logfile.with_name( slurm_logfile.name.replace("%j", slurm_jobid) ) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 626a90a4..c2e6a007 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -3,9 +3,43 @@ """ import re +import subprocess from snakemake_interface_common.exceptions import WorkflowError +def validate_slurm_job_id(job_id, output): + """ + Validate that the SLURM job ID is a positive integer. + + Args: + job_id (str): The SLURM job ID to validate. + + Raises: + WorkflowError: If the job ID is not a positive integer or we cannot + determine a valid job ID from the given input string. + """ + if re.match(r"^\d+$", job_id): + return job_id + else: + # try matching a positive integer, raise an error if more than one match or no match found + # Match standalone integers, excluding those followed by %, letters, or digits (units/percentages/floats) + # Allows format: "1234" or "1234; clustername" (SLURM multi-cluster format) + + # If the first attempt to validate the job fails, try parsing the sbatch output + # a bit more sophisticatedly: + matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output) + if len(matches) == 1: + return matches[0] + elif len(matches) > 1: + raise WorkflowError( + f"Multiple possible SLURM job IDs found in: {output}. Was looking for exactly one positive integer." + ) + elif not matches: + raise WorkflowError( + f"No valid SLURM job ID found in: {output}. Was looking for exactly one positive integer." + ) + + def get_forbidden_slurm_options(): """ Return a dictionary of forbidden SLURM options that the executor manages. From 44249fe87d838dc8399f5016b1b55dbfe549e305 Mon Sep 17 00:00:00 2001 From: meesters Date: Fri, 14 Nov 2025 12:15:10 +0100 Subject: [PATCH 02/17] fix: syntax typo --- snakemake_executor_plugin_slurm/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py index 4d48e06e..cbd4e847 100644 --- a/snakemake_executor_plugin_slurm/__init__.py +++ b/snakemake_executor_plugin_slurm/__init__.py @@ -385,7 +385,7 @@ def run_job(self, job: JobExecutorInterface): # this slurm_jobid might be wrong: some cluster admin give convulted # sbatch outputs. So we need to validate it properly: if not slurm_jobid: - slurm_jobid = validate_slurm_job_id(slurm_jobidout) + slurm_jobid = validate_slurm_job_id(slurm_jobid, out) slurm_logfile = slurm_logfile.with_name( slurm_logfile.name.replace("%j", slurm_jobid) ) From 7b66597e588464dac49c7472d88392222df3333c Mon Sep 17 00:00:00 2001 From: meesters Date: Fri, 14 Nov 2025 13:45:34 +0100 Subject: [PATCH 03/17] fix: removed sunused import --- snakemake_executor_plugin_slurm/validation.py | 1 - 1 file changed, 1 deletion(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index c2e6a007..c6a79267 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -3,7 +3,6 @@ """ import re -import subprocess from snakemake_interface_common.exceptions import WorkflowError From 44a55071cf6ff527991b844f52a87e5d068dba31 Mon Sep 17 00:00:00 2001 From: meesters Date: Fri, 14 Nov 2025 13:54:26 +0100 Subject: [PATCH 04/17] feat: included black configuration! --- pyproject.toml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pyproject.toml b/pyproject.toml index 9d6d90c0..f95bcd21 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,9 @@ pandas = "^2.2.3" [tool.coverage.run] omit = [".*", "*/site-packages/*", "Snakefile"] +[tool.black] +line-length = 88 + [build-system] requires = ["poetry-core"] build-backend = "poetry.core.masonry.api" From b9d38fbd3ff87fa2200e38ea2db6b7fb9e52fb93 Mon Sep 17 00:00:00 2001 From: meesters Date: Fri, 14 Nov 2025 13:54:46 +0100 Subject: [PATCH 05/17] fix: line length (formatting)! --- snakemake_executor_plugin_slurm/validation.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index c6a79267..52957257 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -20,22 +20,27 @@ def validate_slurm_job_id(job_id, output): if re.match(r"^\d+$", job_id): return job_id else: - # try matching a positive integer, raise an error if more than one match or no match found - # Match standalone integers, excluding those followed by %, letters, or digits (units/percentages/floats) - # Allows format: "1234" or "1234; clustername" (SLURM multi-cluster format) + # Try matching a positive integer, raise an error if more than one match or + # no match found. Match standalone integers, excluding those followed by %, letters, + # or digits (units/percentages/floats). Allows format: "1234" or + # "1234; clustername" (SLURM multi-cluster format). # If the first attempt to validate the job fails, try parsing the sbatch output # a bit more sophisticatedly: - matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output) + matches = re.findall( + r"\b\d+(?![%A-Za-z\d.])", output + ) if len(matches) == 1: return matches[0] elif len(matches) > 1: raise WorkflowError( - f"Multiple possible SLURM job IDs found in: {output}. Was looking for exactly one positive integer." + f"Multiple possible SLURM job IDs found in: {output}. " + "Was looking for exactly one positive integer." ) elif not matches: raise WorkflowError( - f"No valid SLURM job ID found in: {output}. Was looking for exactly one positive integer." + f"No valid SLURM job ID found in: {output}. " + "Was looking for exactly one positive integer." ) From 0c0c757a7cb269ef7ec623b27570aad4341d08ef Mon Sep 17 00:00:00 2001 From: Christian Meesters Date: Fri, 14 Nov 2025 15:57:59 +0000 Subject: [PATCH 06/17] Update snakemake_executor_plugin_slurm/validation.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- snakemake_executor_plugin_slurm/validation.py | 1 + 1 file changed, 1 insertion(+) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 52957257..6ef30fe8 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -12,6 +12,7 @@ def validate_slurm_job_id(job_id, output): Args: job_id (str): The SLURM job ID to validate. + output (str): The full sbatch output to parse if job_id is invalid. Raises: WorkflowError: If the job ID is not a positive integer or we cannot From 40783f88138d3c80724b95fa7116c1b6324b22e5 Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 12:13:57 +0100 Subject: [PATCH 07/17] fix: unconditional JOBID validation and renamed the validator function --- snakemake_executor_plugin_slurm/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py index cbd4e847..617e2a7e 100644 --- a/snakemake_executor_plugin_slurm/__init__.py +++ b/snakemake_executor_plugin_slurm/__init__.py @@ -35,7 +35,7 @@ ) from .efficiency_report import create_efficiency_report from .submit_string import get_submit_command -from .validation import validate_slurm_job_id, validate_slurm_extra +from .validation import validate_or_get_slurm_job_id, validate_slurm_extra @dataclass @@ -383,9 +383,9 @@ def run_job(self, job: JobExecutorInterface): # (this also works if no cluster name was provided) slurm_jobid = out.strip().split(";")[0] # this slurm_jobid might be wrong: some cluster admin give convulted - # sbatch outputs. So we need to validate it properly: - if not slurm_jobid: - slurm_jobid = validate_slurm_job_id(slurm_jobid, out) + # sbatch outputs. So we need to validate it properly (and replace it + # if necessary). + slurm_jobid = validate_or_get_slurm_job_id(slurm_jobid, out) slurm_logfile = slurm_logfile.with_name( slurm_logfile.name.replace("%j", slurm_jobid) ) From 40a2d8215111396ef7dd912207b8fc73dc27fd1a Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 12:14:24 +0100 Subject: [PATCH 08/17] fix: renamed the validator function - added regex comments --- snakemake_executor_plugin_slurm/validation.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 52957257..2a3e5c37 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -6,7 +6,7 @@ from snakemake_interface_common.exceptions import WorkflowError -def validate_slurm_job_id(job_id, output): +def validate_or_get_slurm_job_id(job_id, output): """ Validate that the SLURM job ID is a positive integer. @@ -17,6 +17,9 @@ def validate_slurm_job_id(job_id, output): WorkflowError: If the job ID is not a positive integer or we cannot determine a valid job ID from the given input string. """ + # this regex just matches a positive integer + # strict validation would require to check for a JOBID with either + # the SLURM database or control daemon. This is too much overhead. if re.match(r"^\d+$", job_id): return job_id else: @@ -26,7 +29,10 @@ def validate_slurm_job_id(job_id, output): # "1234; clustername" (SLURM multi-cluster format). # If the first attempt to validate the job fails, try parsing the sbatch output - # a bit more sophisticatedly: + # a bit more sophisticatedly ( + # The regex below matches standalone positive integers, there has to be a + # word boundary before the number and the number must not be followed by + # a percent sign, letter, digit, or dot): matches = re.findall( r"\b\d+(?![%A-Za-z\d.])", output ) From 8b7fed775686989b0d386755ce852d6d4291538b Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 12:14:59 +0100 Subject: [PATCH 09/17] fix: formatting --- snakemake_executor_plugin_slurm/validation.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 2a3e5c37..4cadbd9c 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -30,12 +30,10 @@ def validate_or_get_slurm_job_id(job_id, output): # If the first attempt to validate the job fails, try parsing the sbatch output # a bit more sophisticatedly ( - # The regex below matches standalone positive integers, there has to be a + # The regex below matches standalone positive integers, there has to be a # word boundary before the number and the number must not be followed by # a percent sign, letter, digit, or dot): - matches = re.findall( - r"\b\d+(?![%A-Za-z\d.])", output - ) + matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output) if len(matches) == 1: return matches[0] elif len(matches) > 1: From 73f341d9a03051fb305710d723b72caa46a9ef03 Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 12:21:07 +0100 Subject: [PATCH 10/17] fix: linting1 --- snakemake_executor_plugin_slurm/validation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 4754b813..c8a2a367 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -25,8 +25,8 @@ def validate_or_get_slurm_job_id(job_id, output): return job_id else: # Try matching a positive integer, raise an error if more than one match or - # no match found. Match standalone integers, excluding those followed by %, letters, - # or digits (units/percentages/floats). Allows format: "1234" or + # no match found. Match standalone integers, excluding those followed by %, + # letters, or digits (units/percentages/floats). Allows format: "1234" or # "1234; clustername" (SLURM multi-cluster format). # If the first attempt to validate the job fails, try parsing the sbatch output From f880423985ce03c26f7099cbe8526d41a9dfd099 Mon Sep 17 00:00:00 2001 From: Christian Meesters Date: Mon, 17 Nov 2025 11:21:55 +0000 Subject: [PATCH 11/17] Update snakemake_executor_plugin_slurm/__init__.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- snakemake_executor_plugin_slurm/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py index 617e2a7e..e2428002 100644 --- a/snakemake_executor_plugin_slurm/__init__.py +++ b/snakemake_executor_plugin_slurm/__init__.py @@ -382,7 +382,7 @@ def run_job(self, job: JobExecutorInterface): # To extract the job id we split by semicolon and take the first element # (this also works if no cluster name was provided) slurm_jobid = out.strip().split(";")[0] - # this slurm_jobid might be wrong: some cluster admin give convulted + # this slurm_jobid might be wrong: some cluster admin give convoluted # sbatch outputs. So we need to validate it properly (and replace it # if necessary). slurm_jobid = validate_or_get_slurm_job_id(slurm_jobid, out) From 2ab4acffefa60a34e1bf2beb958272c387d940b1 Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 12:22:47 +0100 Subject: [PATCH 12/17] fix: formatting - a f****** white space --- snakemake_executor_plugin_slurm/validation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index c8a2a367..8e57d33d 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -25,7 +25,7 @@ def validate_or_get_slurm_job_id(job_id, output): return job_id else: # Try matching a positive integer, raise an error if more than one match or - # no match found. Match standalone integers, excluding those followed by %, + # no match found. Match standalone integers, excluding those followed by %, # letters, or digits (units/percentages/floats). Allows format: "1234" or # "1234; clustername" (SLURM multi-cluster format). From 9c21a5d0cf843dcf1cef8605a9bacdb40c0fafed Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 14:14:46 +0100 Subject: [PATCH 13/17] feat: extended the jobid regex to be more stable (rejecting matching units) --- snakemake_executor_plugin_slurm/validation.py | 21 ++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 8e57d33d..7f689a33 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -30,11 +30,22 @@ def validate_or_get_slurm_job_id(job_id, output): # "1234; clustername" (SLURM multi-cluster format). # If the first attempt to validate the job fails, try parsing the sbatch output - # a bit more sophisticatedly ( - # The regex below matches standalone positive integers, there has to be a - # word boundary before the number and the number must not be followed by - # a percent sign, letter, digit, or dot): - matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output) + # a bit more sophisticatedly. + # The regex below matches standalone positive integers with a word boundary + # before the number. The number must NOT be: + # - Part of a decimal number (neither before nor after the dot) + # - Followed by a percent sign with optional space (23% or 23 %) + # - Followed by units/counts with optional space: + # * Memory units: k, K, m, M, g, G, kiB, KiB, miB, MiB, giB, GiB + # * Resource counts: files, cores, hours, cpus/CPUs (case-insensitive) + # * minutes are excluded, because of the match to 'm' for Megabytes + # Units must be followed by whitespace, hyphen, period, or end of string + # Use negative lookbehind to exclude digits after a dot, and negative lookahead + # to exclude digits before a dot or followed by units/percent + matches = re.findall( + r"(? 1: From aaf326416dfdb63361494c724129f0f80699bd5e Mon Sep 17 00:00:00 2001 From: meesters Date: Mon, 17 Nov 2025 14:16:05 +0100 Subject: [PATCH 14/17] test: extended test range for this PR --- tests/tests.py | 224 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 223 insertions(+), 1 deletion(-) diff --git a/tests/tests.py b/tests/tests.py index 6dc47098..11de6bf2 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -15,7 +15,10 @@ ) from snakemake_executor_plugin_slurm.utils import set_gres_string from snakemake_executor_plugin_slurm.submit_string import get_submit_command -from snakemake_executor_plugin_slurm.validation import validate_slurm_extra +from snakemake_executor_plugin_slurm.validation import ( + validate_slurm_extra, + validate_or_get_slurm_job_id, +) from snakemake_interface_common.exceptions import WorkflowError import pandas as pd @@ -863,3 +866,222 @@ def test_multiple_forbidden_options(self, mock_job): # Should raise error for job-name (first one encountered) with pytest.raises(WorkflowError, match=r"job-name.*not allowed"): validate_slurm_extra(job) + + +class TestSlurmJobIdValidation: + def test_complex_multiline_output_debug(self): + """Debug: Print regex matches for complex multiline SLURM output.""" + output = """ +╔══════════════════════════════════════════════════════════════════════════════╗ +║ SLURM Job Submission ║ +╚══════════════════════════════════════════════════════════════════════════════╝ + +Cluster Information: + - Queue utilization: 67.8% + - Available memory: 512 GiB + - Free storage: 2.5 TiB (1500 files pending) + +Job Configuration: + - Requested memory: 64 GiB + - Requested time: 48.5 hours + - Cores: 16 + +Submitting job 202411170001 to cluster... + +Status: + - Queue position: 23 + - Estimated start: 15.3 minutes +""" + import re + # Use the same regex as in validate_or_get_slurm_job_id + regex = r"(? Date: Mon, 17 Nov 2025 14:19:43 +0100 Subject: [PATCH 15/17] fix: corrected test cases --- tests/tests.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/tests/tests.py b/tests/tests.py index 11de6bf2..5b0ce12d 100644 --- a/tests/tests.py +++ b/tests/tests.py @@ -869,35 +869,6 @@ def test_multiple_forbidden_options(self, mock_job): class TestSlurmJobIdValidation: - def test_complex_multiline_output_debug(self): - """Debug: Print regex matches for complex multiline SLURM output.""" - output = """ -╔══════════════════════════════════════════════════════════════════════════════╗ -║ SLURM Job Submission ║ -╚══════════════════════════════════════════════════════════════════════════════╝ - -Cluster Information: - - Queue utilization: 67.8% - - Available memory: 512 GiB - - Free storage: 2.5 TiB (1500 files pending) - -Job Configuration: - - Requested memory: 64 GiB - - Requested time: 48.5 hours - - Cores: 16 - -Submitting job 202411170001 to cluster... - -Status: - - Queue position: 23 - - Estimated start: 15.3 minutes -""" - import re - # Use the same regex as in validate_or_get_slurm_job_id - regex = r"(? Date: Mon, 17 Nov 2025 13:22:04 +0000 Subject: [PATCH 16/17] Update snakemake_executor_plugin_slurm/validation.py Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com> --- snakemake_executor_plugin_slurm/validation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 7f689a33..04accdca 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -43,7 +43,8 @@ def validate_or_get_slurm_job_id(job_id, output): # Use negative lookbehind to exclude digits after a dot, and negative lookahead # to exclude digits before a dot or followed by units/percent matches = re.findall( - r"(? Date: Thu, 27 Nov 2025 12:30:12 +0100 Subject: [PATCH 17/17] feat: updated verbose message, if no jobid can be found --- snakemake_executor_plugin_slurm/validation.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py index 04accdca..f3829a60 100644 --- a/snakemake_executor_plugin_slurm/validation.py +++ b/snakemake_executor_plugin_slurm/validation.py @@ -57,7 +57,10 @@ def validate_or_get_slurm_job_id(job_id, output): elif not matches: raise WorkflowError( f"No valid SLURM job ID found in: {output}. " - "Was looking for exactly one positive integer." + "Was looking for exactly one positive integer. " + "We tried our best to parse the sbatch output, but it appears " + "too convoluted. Please run 'sbatch' manually and report the output " + "to us. If the output is garbled, inform your cluster administrators." )