From fe6a8126a6a3b5b7d749856304258c42db10719a Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Fri, 14 Nov 2025 12:12:46 +0100
Subject: [PATCH 01/17] fix: extracting job id from convoluted output, if
 necessary

---
 snakemake_executor_plugin_slurm/__init__.py   |  6 ++--
 snakemake_executor_plugin_slurm/validation.py | 34 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py
index 583b0773..4d48e06e 100644
--- a/snakemake_executor_plugin_slurm/__init__.py
+++ b/snakemake_executor_plugin_slurm/__init__.py
@@ -35,7 +35,7 @@
 )
 from .efficiency_report import create_efficiency_report
 from .submit_string import get_submit_command
-from .validation import validate_slurm_extra
+from .validation import validate_slurm_job_id, validate_slurm_extra
 
 
 @dataclass
@@ -382,8 +382,10 @@ def run_job(self, job: JobExecutorInterface):
         # To extract the job id we split by semicolon and take the first element
         # (this also works if no cluster name was provided)
         slurm_jobid = out.strip().split(";")[0]
+        # this slurm_jobid might be wrong: some cluster admin give convulted
+        # sbatch outputs. So we need to validate it properly:
         if not slurm_jobid:
-            raise WorkflowError("Failed to retrieve SLURM job ID from sbatch output.")
+            slurm_jobid = validate_slurm_job_id(slurm_jobidout)
         slurm_logfile = slurm_logfile.with_name(
             slurm_logfile.name.replace("%j", slurm_jobid)
         )
diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 626a90a4..c2e6a007 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -3,9 +3,43 @@
 """
 
 import re
+import subprocess
 from snakemake_interface_common.exceptions import WorkflowError
 
 
+def validate_slurm_job_id(job_id, output):
+    """
+    Validate that the SLURM job ID is a positive integer.
+
+    Args:
+        job_id (str): The SLURM job ID to validate.
+
+    Raises:
+        WorkflowError: If the job ID is not a positive integer or we cannot
+                       determine a valid job ID from the given input string.
+    """
+    if re.match(r"^\d+$", job_id):
+        return job_id
+    else:
+        # try matching a positive integer, raise an error if more than one match or no match found
+        # Match standalone integers, excluding those followed by %, letters, or digits (units/percentages/floats)
+        # Allows format: "1234" or "1234; clustername" (SLURM multi-cluster format)
+
+        # If the first attempt to validate the job fails, try parsing the sbatch output
+        # a bit more sophisticatedly:
+        matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output)
+        if len(matches) == 1:
+            return matches[0]
+        elif len(matches) > 1:
+            raise WorkflowError(
+                f"Multiple possible SLURM job IDs found in: {output}. Was looking for exactly one positive integer."
+            )
+        elif not matches:
+            raise WorkflowError(
+                f"No valid SLURM job ID found in: {output}. Was looking for exactly one positive integer."
+            )
+
+
 def get_forbidden_slurm_options():
     """
     Return a dictionary of forbidden SLURM options that the executor manages.

From 44249fe87d838dc8399f5016b1b55dbfe549e305 Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Fri, 14 Nov 2025 12:15:10 +0100
Subject: [PATCH 02/17] fix: syntax typo

---
 snakemake_executor_plugin_slurm/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py
index 4d48e06e..cbd4e847 100644
--- a/snakemake_executor_plugin_slurm/__init__.py
+++ b/snakemake_executor_plugin_slurm/__init__.py
@@ -385,7 +385,7 @@ def run_job(self, job: JobExecutorInterface):
         # this slurm_jobid might be wrong: some cluster admin give convulted
         # sbatch outputs. So we need to validate it properly:
         if not slurm_jobid:
-            slurm_jobid = validate_slurm_job_id(slurm_jobidout)
+            slurm_jobid = validate_slurm_job_id(slurm_jobid, out)
         slurm_logfile = slurm_logfile.with_name(
             slurm_logfile.name.replace("%j", slurm_jobid)
         )

From 7b66597e588464dac49c7472d88392222df3333c Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Fri, 14 Nov 2025 13:45:34 +0100
Subject: [PATCH 03/17] fix: removed sunused import

---
 snakemake_executor_plugin_slurm/validation.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index c2e6a007..c6a79267 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -3,7 +3,6 @@
 """
 
 import re
-import subprocess
 from snakemake_interface_common.exceptions import WorkflowError
 
 

From 44a55071cf6ff527991b844f52a87e5d068dba31 Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Fri, 14 Nov 2025 13:54:26 +0100
Subject: [PATCH 04/17] feat: included black configuration!

---
 pyproject.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/pyproject.toml b/pyproject.toml
index 9d6d90c0..f95bcd21 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,6 +33,9 @@ pandas = "^2.2.3"
 [tool.coverage.run]
 omit = [".*", "*/site-packages/*", "Snakefile"]
 
+[tool.black]
+line-length = 88
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

From b9d38fbd3ff87fa2200e38ea2db6b7fb9e52fb93 Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Fri, 14 Nov 2025 13:54:46 +0100
Subject: [PATCH 05/17] fix: line length (formatting)!

---
 snakemake_executor_plugin_slurm/validation.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index c6a79267..52957257 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -20,22 +20,27 @@ def validate_slurm_job_id(job_id, output):
     if re.match(r"^\d+$", job_id):
         return job_id
     else:
-        # try matching a positive integer, raise an error if more than one match or no match found
-        # Match standalone integers, excluding those followed by %, letters, or digits (units/percentages/floats)
-        # Allows format: "1234" or "1234; clustername" (SLURM multi-cluster format)
+        # Try matching a positive integer, raise an error if more than one match or
+        # no match found. Match standalone integers, excluding those followed by %, letters,
+        # or digits (units/percentages/floats). Allows format: "1234" or
+        # "1234; clustername" (SLURM multi-cluster format).
 
         # If the first attempt to validate the job fails, try parsing the sbatch output
         # a bit more sophisticatedly:
-        matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output)
+        matches = re.findall(
+            r"\b\d+(?![%A-Za-z\d.])", output
+        )
         if len(matches) == 1:
             return matches[0]
         elif len(matches) > 1:
             raise WorkflowError(
-                f"Multiple possible SLURM job IDs found in: {output}. Was looking for exactly one positive integer."
+                f"Multiple possible SLURM job IDs found in: {output}. "
+                "Was looking for exactly one positive integer."
             )
         elif not matches:
             raise WorkflowError(
-                f"No valid SLURM job ID found in: {output}. Was looking for exactly one positive integer."
+                f"No valid SLURM job ID found in: {output}. "
+                "Was looking for exactly one positive integer."
             )
 
 

From 0c0c757a7cb269ef7ec623b27570aad4341d08ef Mon Sep 17 00:00:00 2001
From: Christian Meesters <cmeesters@users.noreply.github.com>
Date: Fri, 14 Nov 2025 15:57:59 +0000
Subject: [PATCH 06/17] Update snakemake_executor_plugin_slurm/validation.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 snakemake_executor_plugin_slurm/validation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 52957257..6ef30fe8 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -12,6 +12,7 @@ def validate_slurm_job_id(job_id, output):
 
     Args:
         job_id (str): The SLURM job ID to validate.
+        output (str): The full sbatch output to parse if job_id is invalid.
 
     Raises:
         WorkflowError: If the job ID is not a positive integer or we cannot

From 40783f88138d3c80724b95fa7116c1b6324b22e5 Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 12:13:57 +0100
Subject: [PATCH 07/17] fix: unconditional JOBID validation and renamed the
 validator function

---
 snakemake_executor_plugin_slurm/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py
index cbd4e847..617e2a7e 100644
--- a/snakemake_executor_plugin_slurm/__init__.py
+++ b/snakemake_executor_plugin_slurm/__init__.py
@@ -35,7 +35,7 @@
 )
 from .efficiency_report import create_efficiency_report
 from .submit_string import get_submit_command
-from .validation import validate_slurm_job_id, validate_slurm_extra
+from .validation import validate_or_get_slurm_job_id, validate_slurm_extra
 
 
 @dataclass
@@ -383,9 +383,9 @@ def run_job(self, job: JobExecutorInterface):
         # (this also works if no cluster name was provided)
         slurm_jobid = out.strip().split(";")[0]
         # this slurm_jobid might be wrong: some cluster admin give convulted
-        # sbatch outputs. So we need to validate it properly:
-        if not slurm_jobid:
-            slurm_jobid = validate_slurm_job_id(slurm_jobid, out)
+        # sbatch outputs. So we need to validate it properly (and replace it
+        # if necessary).
+        slurm_jobid = validate_or_get_slurm_job_id(slurm_jobid, out)
         slurm_logfile = slurm_logfile.with_name(
             slurm_logfile.name.replace("%j", slurm_jobid)
         )

From 40a2d8215111396ef7dd912207b8fc73dc27fd1a Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 12:14:24 +0100
Subject: [PATCH 08/17] fix: renamed the validator function - added regex
 comments

---
 snakemake_executor_plugin_slurm/validation.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 52957257..2a3e5c37 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -6,7 +6,7 @@
 from snakemake_interface_common.exceptions import WorkflowError
 
 
-def validate_slurm_job_id(job_id, output):
+def validate_or_get_slurm_job_id(job_id, output):
     """
     Validate that the SLURM job ID is a positive integer.
 
@@ -17,6 +17,9 @@ def validate_slurm_job_id(job_id, output):
         WorkflowError: If the job ID is not a positive integer or we cannot
                        determine a valid job ID from the given input string.
     """
+    # this regex just matches a positive integer
+    # strict validation would require to check for a JOBID with either
+    # the SLURM database or control daemon. This is too much overhead.
     if re.match(r"^\d+$", job_id):
         return job_id
     else:
@@ -26,7 +29,10 @@ def validate_slurm_job_id(job_id, output):
         # "1234; clustername" (SLURM multi-cluster format).
 
         # If the first attempt to validate the job fails, try parsing the sbatch output
-        # a bit more sophisticatedly:
+        # a bit more sophisticatedly (
+        # The regex below matches standalone positive integers, there has to be a 
+        # word boundary before the number and the number must not be followed by
+        # a percent sign, letter, digit, or dot):
         matches = re.findall(
             r"\b\d+(?![%A-Za-z\d.])", output
         )

From 8b7fed775686989b0d386755ce852d6d4291538b Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 12:14:59 +0100
Subject: [PATCH 09/17] fix: formatting

---
 snakemake_executor_plugin_slurm/validation.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 2a3e5c37..4cadbd9c 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -30,12 +30,10 @@ def validate_or_get_slurm_job_id(job_id, output):
 
         # If the first attempt to validate the job fails, try parsing the sbatch output
         # a bit more sophisticatedly (
-        # The regex below matches standalone positive integers, there has to be a 
+        # The regex below matches standalone positive integers, there has to be a
         # word boundary before the number and the number must not be followed by
         # a percent sign, letter, digit, or dot):
-        matches = re.findall(
-            r"\b\d+(?![%A-Za-z\d.])", output
-        )
+        matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output)
         if len(matches) == 1:
             return matches[0]
         elif len(matches) > 1:

From 73f341d9a03051fb305710d723b72caa46a9ef03 Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 12:21:07 +0100
Subject: [PATCH 10/17] fix: linting1

---
 snakemake_executor_plugin_slurm/validation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 4754b813..c8a2a367 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -25,8 +25,8 @@ def validate_or_get_slurm_job_id(job_id, output):
         return job_id
     else:
         # Try matching a positive integer, raise an error if more than one match or
-        # no match found. Match standalone integers, excluding those followed by %, letters,
-        # or digits (units/percentages/floats). Allows format: "1234" or
+        # no match found. Match standalone integers, excluding those followed by %, 
+        # letters, or digits (units/percentages/floats). Allows format: "1234" or
         # "1234; clustername" (SLURM multi-cluster format).
 
         # If the first attempt to validate the job fails, try parsing the sbatch output

From f880423985ce03c26f7099cbe8526d41a9dfd099 Mon Sep 17 00:00:00 2001
From: Christian Meesters <cmeesters@users.noreply.github.com>
Date: Mon, 17 Nov 2025 11:21:55 +0000
Subject: [PATCH 11/17] Update snakemake_executor_plugin_slurm/__init__.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 snakemake_executor_plugin_slurm/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snakemake_executor_plugin_slurm/__init__.py b/snakemake_executor_plugin_slurm/__init__.py
index 617e2a7e..e2428002 100644
--- a/snakemake_executor_plugin_slurm/__init__.py
+++ b/snakemake_executor_plugin_slurm/__init__.py
@@ -382,7 +382,7 @@ def run_job(self, job: JobExecutorInterface):
         # To extract the job id we split by semicolon and take the first element
         # (this also works if no cluster name was provided)
         slurm_jobid = out.strip().split(";")[0]
-        # this slurm_jobid might be wrong: some cluster admin give convulted
+        # this slurm_jobid might be wrong: some cluster admin give convoluted
         # sbatch outputs. So we need to validate it properly (and replace it
         # if necessary).
         slurm_jobid = validate_or_get_slurm_job_id(slurm_jobid, out)

From 2ab4acffefa60a34e1bf2beb958272c387d940b1 Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 12:22:47 +0100
Subject: [PATCH 12/17] fix: formatting - a f****** white space

---
 snakemake_executor_plugin_slurm/validation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index c8a2a367..8e57d33d 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -25,7 +25,7 @@ def validate_or_get_slurm_job_id(job_id, output):
         return job_id
     else:
         # Try matching a positive integer, raise an error if more than one match or
-        # no match found. Match standalone integers, excluding those followed by %, 
+        # no match found. Match standalone integers, excluding those followed by %,
         # letters, or digits (units/percentages/floats). Allows format: "1234" or
         # "1234; clustername" (SLURM multi-cluster format).
 

From 9c21a5d0cf843dcf1cef8605a9bacdb40c0fafed Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 14:14:46 +0100
Subject: [PATCH 13/17] feat: extended the jobid regex to be more stable
 (rejecting matching units)

---
 snakemake_executor_plugin_slurm/validation.py | 21 ++++++++++++++-----
 1 file changed, 16 insertions(+), 5 deletions(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 8e57d33d..7f689a33 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -30,11 +30,22 @@ def validate_or_get_slurm_job_id(job_id, output):
         # "1234; clustername" (SLURM multi-cluster format).
 
         # If the first attempt to validate the job fails, try parsing the sbatch output
-        # a bit more sophisticatedly (
-        # The regex below matches standalone positive integers, there has to be a
-        # word boundary before the number and the number must not be followed by
-        # a percent sign, letter, digit, or dot):
-        matches = re.findall(r"\b\d+(?![%A-Za-z\d.])", output)
+        # a bit more sophisticatedly.
+        # The regex below matches standalone positive integers with a word boundary
+        # before the number. The number must NOT be:
+        # - Part of a decimal number (neither before nor after the dot)
+        # - Followed by a percent sign with optional space (23% or 23 %)
+        # - Followed by units/counts with optional space:
+        #   * Memory units: k, K, m, M, g, G, kiB, KiB, miB, MiB, giB, GiB
+        #   * Resource counts: files, cores, hours, cpus/CPUs (case-insensitive)
+        #   * minutes are excluded, because of the match to 'm' for Megabytes
+        #   Units must be followed by whitespace, hyphen, period, or end of string
+        # Use negative lookbehind to exclude digits after a dot, and negative lookahead
+        # to exclude digits before a dot or followed by units/percent
+        matches = re.findall(
+            r"(?<![.\d])\d+(?![.\d]|\s*%|\s*(?:[kKmMgG](?:iB)?|files|cores|hours|[cC][pP][uU][sS]?)(?:\s|[-.]|$))",
+            output,
+        )
         if len(matches) == 1:
             return matches[0]
         elif len(matches) > 1:

From aaf326416dfdb63361494c724129f0f80699bd5e Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 14:16:05 +0100
Subject: [PATCH 14/17] test: extended test range for this PR

---
 tests/tests.py | 224 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 223 insertions(+), 1 deletion(-)

diff --git a/tests/tests.py b/tests/tests.py
index 6dc47098..11de6bf2 100644
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -15,7 +15,10 @@
 )
 from snakemake_executor_plugin_slurm.utils import set_gres_string
 from snakemake_executor_plugin_slurm.submit_string import get_submit_command
-from snakemake_executor_plugin_slurm.validation import validate_slurm_extra
+from snakemake_executor_plugin_slurm.validation import (
+    validate_slurm_extra,
+    validate_or_get_slurm_job_id,
+)
 from snakemake_interface_common.exceptions import WorkflowError
 import pandas as pd
 
@@ -863,3 +866,222 @@ def test_multiple_forbidden_options(self, mock_job):
         # Should raise error for job-name (first one encountered)
         with pytest.raises(WorkflowError, match=r"job-name.*not allowed"):
             validate_slurm_extra(job)
+
+
+class TestSlurmJobIdValidation:
+        def test_complex_multiline_output_debug(self):
+                """Debug: Print regex matches for complex multiline SLURM output."""
+                output = """
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                            SLURM Job Submission                               ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+Cluster Information:
+    - Queue utilization: 67.8%
+    - Available memory: 512 GiB
+    - Free storage: 2.5 TiB (1500 files pending)
+
+Job Configuration:
+    - Requested memory: 64 GiB
+    - Requested time: 48.5 hours
+    - Cores: 16
+
+Submitting job 202411170001 to cluster...
+
+Status:
+    - Queue position: 23
+    - Estimated start: 15.3 minutes
+"""
+                import re
+                # Use the same regex as in validate_or_get_slurm_job_id
+                regex = r"(?<![\d.])(?<![\d.][kKmMgG]|[kK][iI][bB]|[mM][iI][bB]|[gG][iI][bB]|files|cores|cpus|hours)\b(\d{4,})\b(?!\s*(?:k|K|m|M|g|G|kiB|KiB|miB|MiB|giB|GiB|files|cores|cpus|hours|%))"
+                matches = re.findall(regex, output)
+                print("DEBUG REGEX MATCHES:", matches)
+                # No assert, just print for debug
+    """Test cases for the validate_or_get_slurm_job_id function."""
+
+    def test_parsable_format_simple(self):
+        """Test parsable format with just job ID."""
+        output = "12345"
+        result = validate_or_get_slurm_job_id("12345", output)
+        assert result == "12345"
+
+    def test_parsable_format_with_cluster(self):
+        """Test parsable format with cluster name (jobid;clustername)."""
+        output = "54321;mycluster"
+        result = validate_or_get_slurm_job_id("54321", output)
+        assert result == "54321"
+
+    def test_convoluted_output_with_percentages(self):
+        """Test extraction from output containing percentages."""
+        output = """Lorem ipsum dolor sit amet, consectetur adipiscing elit.
+Maecenas quis risus porttitor: 25%
+pretium enim volutpat: 23.3%
+Submitted batch job 88888
+some other text"""
+        result = validate_or_get_slurm_job_id("88888", output)
+        assert result == "88888"
+
+    def test_convoluted_output_with_units(self):
+        """Test extraction from output containing memory/size units."""
+        output = """System information:
+Memory available: 256 GiB
+CPU usage: 12 cores
+Storage: 500 G
+Job ID: 77777
+Files processed: 1500 files"""
+        result = validate_or_get_slurm_job_id("77777", output)
+        assert result == "77777"
+
+    def test_convoluted_output_mixed(self):
+        """Test extraction with percentages, decimals, and units mixed."""
+        output = """Cluster status report
+Queue utilization: 75.5%
+Memory allocated: 128 KiB per node
+Disk usage: 23.3 % of quota
+Allocated space: 50 G
+Processing 3000 files
+Your job 123456 has been submitted
+"""
+        result = validate_or_get_slurm_job_id("123456", output)
+        assert result == "123456"
+
+    def test_job_id_at_beginning(self):
+        """Test extraction when job ID appears at the start."""
+        output = """999888 submitted successfully
+Memory: 64 GiB
+Nodes: 4"""
+        result = validate_or_get_slurm_job_id("999888", output)
+        assert result == "999888"
+
+    def test_job_id_in_middle(self):
+        """Test extraction when job ID is in the middle of output."""
+        output = """Configuration loaded: 100%
+Job 444555 queued
+Estimated wait time: 5.5 minutes"""
+        result = validate_or_get_slurm_job_id("444555", output)
+        assert result == "444555"
+
+    def test_output_with_lowercase_units(self):
+        """Test that lowercase units are properly excluded."""
+        output = """Memory: 32 m
+Storage: 100 k files
+Job: 333222"""
+        result = validate_or_get_slurm_job_id("333222", output)
+        assert result == "333222"
+
+    def test_output_with_cores_and_cpus(self):
+        """Test that numbers followed by 'cores' or 'cpus' are excluded."""
+        output = """System resources:
+Allocated: 16 cores
+Available CPUs: 32
+Using 8 cpus
+Active cpu: 1
+Job ID: 555666"""
+        result = validate_or_get_slurm_job_id("555666", output)
+        assert result == "555666"
+
+    def test_output_with_mixed_case_units(self):
+        """Test mixed case memory units (MiB, GiB, etc.)."""
+        output = """Allocated: 512 MiB
+Reserved: 2 GiB
+Cache: 128 KiB
+Job ID is 111222"""
+        result = validate_or_get_slurm_job_id("111222", output)
+        assert result == "111222"
+
+    def test_decimal_numbers_excluded(self):
+        """Test that decimal numbers are not matched as job IDs."""
+        output = """Performance: 23.3 MB/s
+Efficiency: 99.9%
+Job: 666777
+Load: 1.5"""
+        result = validate_or_get_slurm_job_id("666777", output)
+        assert result == "666777"
+
+    def test_percentage_with_space(self):
+        """Test percentages with space before % sign."""
+        output = """Completion: 45 %
+Progress: 78.5 %
+Job ID: 555444"""
+        result = validate_or_get_slurm_job_id("555444", output)
+        assert result == "555444"
+
+    def test_units_with_hyphen(self):
+        """Test units followed by hyphen."""
+        output = """Memory: 256M-512M range
+Job: 888999"""
+        result = validate_or_get_slurm_job_id("888999", output)
+        assert result == "888999"
+
+    def test_units_with_period(self):
+        """Test units followed by period."""
+        output = """Allocated 128G. Starting job 777888."""
+        result = validate_or_get_slurm_job_id("777888", output)
+        assert result == "777888"
+
+    def test_multiple_job_ids_error(self):
+        """Test that multiple possible job IDs raise an error."""
+        output = """Previous job: 11111
+New job: 22222
+Both are active"""
+        with pytest.raises(
+            WorkflowError, match=r"Multiple possible SLURM job IDs found"
+        ):
+            validate_or_get_slurm_job_id("invalid", output)
+
+    def test_no_valid_job_id_error(self):
+        """Test that output with no valid job ID raises an error."""
+        output = """Error: 23.3%
+Memory: 128 GiB
+Status: 99.9% complete"""
+        with pytest.raises(WorkflowError, match=r"No valid SLURM job ID found"):
+            validate_or_get_slurm_job_id("invalid", output)
+
+    def test_complex_multiline_output(self):
+        """Test complex realistic SLURM output."""
+        output = """
+╔══════════════════════════════════════════════════════════════════════════════╗
+║                            SLURM Job Submission                               ║
+╚══════════════════════════════════════════════════════════════════════════════╝
+
+Cluster Information:
+  - Queue utilization: 67.8%
+  - Available memory: 512 GiB
+  - Free storage: 2.5 TiB (1500 files pending)
+
+Job Configuration:
+  - Requested memory: 64 GiB
+  - Requested time: 48.5 hours
+  - Cores: 16
+
+Submitting job 202411170001 to cluster...
+
+Status:
+  - Queue position: 23
+  - Estimated start: 15.3 minutes
+"""
+        result = validate_or_get_slurm_job_id("202411170001", output)
+        print(result)
+        assert result == "202411170001"
+
+    def test_invalid_job_id_then_extract_from_output(self):
+        """Test fallback extraction when initial job_id is malformed."""
+        output = "Submitted batch job 12345"
+        # Pass invalid job_id, should extract from output
+        result = validate_or_get_slurm_job_id("invalid_id", output)
+        assert result == "12345"
+
+    def test_job_id_with_trailing_content(self):
+        """Test job ID followed by various trailing content."""
+        output = "Job 123456 submitted to partition main"
+        result = validate_or_get_slurm_job_id("123456", output)
+        assert result == "123456"
+
+    def test_numbers_in_filenames_excluded(self):
+        """Test that numbers in 'files' counts are excluded."""
+        output = """Quota 250 files
+Used 100 files
+Job ID: 999000"""
+        result = validate_or_get_slurm_job_id("999000", output)
+        assert result == "999000"

From 1282594a681afb1b2e014698788f797a691c0aad Mon Sep 17 00:00:00 2001
From: meesters <meesters@uni-mainz.de>
Date: Mon, 17 Nov 2025 14:19:43 +0100
Subject: [PATCH 15/17] fix: corrected test cases

---
 tests/tests.py | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/tests/tests.py b/tests/tests.py
index 11de6bf2..5b0ce12d 100644
--- a/tests/tests.py
+++ b/tests/tests.py
@@ -869,35 +869,6 @@ def test_multiple_forbidden_options(self, mock_job):
 
 
 class TestSlurmJobIdValidation:
-        def test_complex_multiline_output_debug(self):
-                """Debug: Print regex matches for complex multiline SLURM output."""
-                output = """
-╔══════════════════════════════════════════════════════════════════════════════╗
-║                            SLURM Job Submission                               ║
-╚══════════════════════════════════════════════════════════════════════════════╝
-
-Cluster Information:
-    - Queue utilization: 67.8%
-    - Available memory: 512 GiB
-    - Free storage: 2.5 TiB (1500 files pending)
-
-Job Configuration:
-    - Requested memory: 64 GiB
-    - Requested time: 48.5 hours
-    - Cores: 16
-
-Submitting job 202411170001 to cluster...
-
-Status:
-    - Queue position: 23
-    - Estimated start: 15.3 minutes
-"""
-                import re
-                # Use the same regex as in validate_or_get_slurm_job_id
-                regex = r"(?<![\d.])(?<![\d.][kKmMgG]|[kK][iI][bB]|[mM][iI][bB]|[gG][iI][bB]|files|cores|cpus|hours)\b(\d{4,})\b(?!\s*(?:k|K|m|M|g|G|kiB|KiB|miB|MiB|giB|GiB|files|cores|cpus|hours|%))"
-                matches = re.findall(regex, output)
-                print("DEBUG REGEX MATCHES:", matches)
-                # No assert, just print for debug
     """Test cases for the validate_or_get_slurm_job_id function."""
 
     def test_parsable_format_simple(self):

From a65bfc0692257ec4205145577fc2ae8c95547d58 Mon Sep 17 00:00:00 2001
From: Christian Meesters <cmeesters@users.noreply.github.com>
Date: Mon, 17 Nov 2025 13:22:04 +0000
Subject: [PATCH 16/17] Update snakemake_executor_plugin_slurm/validation.py

Co-authored-by: coderabbitai[bot] <136622811+coderabbitai[bot]@users.noreply.github.com>
---
 snakemake_executor_plugin_slurm/validation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 7f689a33..04accdca 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -43,7 +43,8 @@ def validate_or_get_slurm_job_id(job_id, output):
         # Use negative lookbehind to exclude digits after a dot, and negative lookahead
         # to exclude digits before a dot or followed by units/percent
         matches = re.findall(
-            r"(?<![.\d])\d+(?![.\d]|\s*%|\s*(?:[kKmMgG](?:iB)?|files|cores|hours|[cC][pP][uU][sS]?)(?:\s|[-.]|$))",
+            r"(?<![.\d])\d+(?![.\d]|\s*%|\s*(?:[kKmMgG](?:iB)?|files|cores|"
+            r"hours|[cC][pP][uU][sS]?)(?:\s|[-.]|$))",
             output,
         )
         if len(matches) == 1:

From 0896c0d91ce47289210152a610dba798baf7b17a Mon Sep 17 00:00:00 2001
From: Christian Meesters <meesters@uni-mainz.de>
Date: Thu, 27 Nov 2025 12:30:12 +0100
Subject: [PATCH 17/17] feat: updated verbose message, if no jobid can be found

---
 snakemake_executor_plugin_slurm/validation.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/snakemake_executor_plugin_slurm/validation.py b/snakemake_executor_plugin_slurm/validation.py
index 04accdca..f3829a60 100644
--- a/snakemake_executor_plugin_slurm/validation.py
+++ b/snakemake_executor_plugin_slurm/validation.py
@@ -57,7 +57,10 @@ def validate_or_get_slurm_job_id(job_id, output):
         elif not matches:
             raise WorkflowError(
                 f"No valid SLURM job ID found in: {output}. "
-                "Was looking for exactly one positive integer."
+                "Was looking for exactly one positive integer. "
+                "We tried our best to parse the sbatch output, but it appears "
+                "too convoluted. Please run 'sbatch' manually and report the output "
+                "to us. If the output is garbled, inform your cluster administrators."
             )