From 3995bdf081c4f5191e91c63c8b2b471754de6833 Mon Sep 17 00:00:00 2001
From: "Michael R. Crusoe" <mrc@commonwl.org>
Date: Sun, 2 Mar 2025 15:02:19 +0100
Subject: [PATCH 1/2] tox: allow passing multiple extra pytest arguments

Example: `tox -e py311-unit -- --lf --pdb`
---
 tox.ini | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tox.ini b/tox.ini
index c5740a845..dc04e2c52 100644
--- a/tox.ini
+++ b/tox.ini
@@ -66,10 +66,10 @@ commands_pre =
   py312-lintreadme: python -m build --outdir {distdir}
 
 commands =
-  py3{9,10,11,12,13}-unit: make coverage-report coverage.xml PYTEST_EXTRA={posargs}
+  py3{9,10,11,12,13}-unit: make coverage-report coverage.xml PYTEST_EXTRA="{posargs}"
   py3{9,10,11,12,13}-bandit: bandit -r cwltool
   py3{9,10,11,12,13}-lint: make flake8 format-check codespell-check
-  py3{9,10,11,12,13}-mypy: make mypy mypyc PYTEST_EXTRA={posargs}
+  py3{9,10,11,12,13}-mypy: make mypy mypyc PYTEST_EXTRA="{posargs}"
   py312-shellcheck: make shellcheck
   py312-pydocstyle: make diff_pydocstyle_report
   py312-lintreadme: twine check {distdir}/*

From 0a2475c347cc04ccb8b87031cfcf301db28a5697 Mon Sep 17 00:00:00 2001
From: stxue1 <stxue@ucsc.edu>
Date: Fri, 7 Feb 2025 11:51:46 -0800
Subject: [PATCH 2/2] caching: enhance consideration of `EnvVarRequirement`s

Especially with regards to overrides and
`--preserve{-entire,}-environement`

Added more tests for environment variable caching

Co-authored-by: Michael R. Crusoe <mrc@commonwl.org>
---
 cwltool/command_line_tool.py     |  8 ++++++
 cwltool/job.py                   | 38 +++++++++++++++++---------
 tests/cache_environment.yml      |  5 ++++
 tests/cache_environment2.yml     |  5 ++++
 tests/cache_environment_tool.cwl |  8 ++++++
 tests/test_environment.py        | 28 +++++++++++++++++++
 tests/test_examples.py           | 47 ++++++++++++++++++++++++++++++++
 7 files changed, 126 insertions(+), 13 deletions(-)
 create mode 100644 tests/cache_environment.yml
 create mode 100644 tests/cache_environment2.yml
 create mode 100644 tests/cache_environment_tool.cwl

diff --git a/cwltool/command_line_tool.py b/cwltool/command_line_tool.py
index cbff45bd0..1ad19a70a 100644
--- a/cwltool/command_line_tool.py
+++ b/cwltool/command_line_tool.py
@@ -876,6 +876,14 @@ def remove_prefix(s: str, prefix: str) -> str:
                     if cls in interesting and cls not in keydict:
                         keydict[cls] = r
 
+            # If there are environmental variables to preserve, add it to the key
+            env_var_requirement = cast(dict[str, str], keydict.get("EnvVarRequirement", {}))
+            env_def = cast(CWLObjectType, self.get_requirement("EnvVarRequirement")[0] or {})
+            if runtimeContext.preserve_environment is not None:
+                env_def.update(JobBase.extract_environment(runtimeContext, env_var_requirement))
+
+            if env_def:
+                keydict["EnvVarRequirement"] = env_def
             keydictstr = json_dumps(keydict, separators=(",", ":"), sort_keys=True)
             cachekey = hashlib.md5(keydictstr.encode("utf-8")).hexdigest()  # nosec
 
diff --git a/cwltool/job.py b/cwltool/job.py
index 2c6bb9f77..075e27ef6 100644
--- a/cwltool/job.py
+++ b/cwltool/job.py
@@ -469,6 +469,29 @@ def _preserve_environment_on_containers_warning(
         # By default, don't do anything; ContainerCommandLineJob below
         # will issue a warning.
 
+    @staticmethod
+    def extract_environment(
+        runtimeContext: RuntimeContext, envVarReq: Mapping[str, str]
+    ) -> Mapping[str, str]:
+        """Extract environment variables that should be preserved."""
+        # Start empty
+        env: dict[str, str] = {}
+        # Preserve any env vars
+        if runtimeContext.preserve_entire_environment:
+            env.update(os.environ)
+        elif runtimeContext.preserve_environment:
+            for key in runtimeContext.preserve_environment:
+                try:
+                    env[key] = os.environ[key]
+                except KeyError:
+                    _logger.warning(
+                        f"Attempting to preserve environment variable {key!r} which is not present"
+                    )
+        # Apply EnvVarRequirement
+        env.update(envVarReq)
+
+        return env
+
     def prepare_environment(
         self, runtimeContext: RuntimeContext, envVarReq: Mapping[str, str]
     ) -> None:
@@ -481,27 +504,16 @@ def prepare_environment(
         """
         # Start empty
         env: dict[str, str] = {}
-
-        # Preserve any env vars
         if runtimeContext.preserve_entire_environment:
             self._preserve_environment_on_containers_warning()
-            env.update(os.environ)
         elif runtimeContext.preserve_environment:
             self._preserve_environment_on_containers_warning(runtimeContext.preserve_environment)
-            for key in runtimeContext.preserve_environment:
-                try:
-                    env[key] = os.environ[key]
-                except KeyError:
-                    _logger.warning(
-                        f"Attempting to preserve environment variable {key!r} which is not present"
-                    )
+
+        env.update(self.extract_environment(runtimeContext, envVarReq))
 
         # Set required env vars
         env.update(self._required_env())
 
-        # Apply EnvVarRequirement
-        env.update(envVarReq)
-
         # Set on ourselves
         self.environment = env
 
diff --git a/tests/cache_environment.yml b/tests/cache_environment.yml
new file mode 100644
index 000000000..97cfe97a2
--- /dev/null
+++ b/tests/cache_environment.yml
@@ -0,0 +1,5 @@
+cwl:requirements:
+ - class: EnvVarRequirement
+   envDef:
+     - envName: TEST_ENV
+       envValue: value1
diff --git a/tests/cache_environment2.yml b/tests/cache_environment2.yml
new file mode 100644
index 000000000..95f3d656b
--- /dev/null
+++ b/tests/cache_environment2.yml
@@ -0,0 +1,5 @@
+cwl:requirements:
+ - class: EnvVarRequirement
+   envDef:
+     - envName: TEST_ENV
+       envValue: value2
diff --git a/tests/cache_environment_tool.cwl b/tests/cache_environment_tool.cwl
new file mode 100644
index 000000000..fd1b80552
--- /dev/null
+++ b/tests/cache_environment_tool.cwl
@@ -0,0 +1,8 @@
+class: CommandLineTool
+cwlVersion: v1.2
+inputs:
+  []
+outputs:
+  []
+
+baseCommand: [bash, "-c", "echo $TEST_ENV"]
diff --git a/tests/test_environment.py b/tests/test_environment.py
index fd2a160ec..245d75d60 100644
--- a/tests/test_environment.py
+++ b/tests/test_environment.py
@@ -242,6 +242,34 @@ def test_preserve_single(
     assert_env_matches(checks, env)
 
 
+@CRT_PARAMS
+def test_preserve_single_missing(
+    crt_params: CheckHolder,
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+    caplog: pytest.LogCaptureFixture,
+) -> None:
+    """Test that attempting to preserve an unset env var produces a warning."""
+    tmp_prefix = str(tmp_path / "canary")
+    args = crt_params.flags + [
+        f"--tmpdir-prefix={tmp_prefix}",
+        "--preserve-environment=RANDOMVAR",
+    ]
+    env = get_tool_env(
+        tmp_path,
+        args,
+        monkeypatch=monkeypatch,
+        runtime_env_accepts_null=crt_params.env_accepts_null,
+    )
+    checks = crt_params.checks(tmp_prefix)
+    assert "RANDOMVAR" not in checks
+    assert (
+        "cwltool:job.py:487 Attempting to preserve environment variable "
+        "'RANDOMVAR' which is not present"
+    ) in caplog.text
+    assert_env_matches(checks, env)
+
+
 @CRT_PARAMS
 def test_preserve_all(
     crt_params: CheckHolder, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
diff --git a/tests/test_examples.py b/tests/test_examples.py
index 0fe7f471c..c0b34d74f 100644
--- a/tests/test_examples.py
+++ b/tests/test_examples.py
@@ -1233,6 +1233,53 @@ def test_secondary_files_v1_0(tmp_path: Path, factor: str) -> None:
     assert error_code == 0
 
 
+@pytest.mark.parametrize("factor", test_factors)
+def test_cache_environment_variable(tmp_path: Path, factor: str) -> None:
+    """Ensure that changing the environment variables will result in different cache keys"""
+    test_file = "cache_environment_tool.cwl"
+    test_job_file = "cache_environment.yml"
+    cache_dir = str(tmp_path / "cwltool_cache")
+    commands = factor.split()
+    commands.extend(
+        [
+            "--cachedir",
+            cache_dir,
+            "--outdir",
+            str(tmp_path / "outdir"),
+            get_data("tests/" + test_file),
+            get_data(f"tests/{test_job_file}"),
+        ]
+    )
+    error_code, _, stderr = get_main_output(commands)
+
+    stderr = re.sub(r"\s\s+", " ", stderr)
+    assert "Output of job will be cached in" in stderr
+    assert error_code == 0, stderr
+
+    error_code, _, stderr = get_main_output(commands)
+    assert "Output of job will be cached in" not in stderr
+    assert error_code == 0, stderr
+
+    commands = factor.split()
+    test_job_file = "cache_environment2.yml"
+    commands.extend(
+        [
+            "--cachedir",
+            cache_dir,
+            "--outdir",
+            str(tmp_path / "outdir"),
+            get_data("tests/" + test_file),
+            get_data(f"tests/{test_job_file}"),
+        ]
+    )
+
+    error_code, _, stderr = get_main_output(commands)
+
+    stderr = re.sub(r"\s\s+", " ", stderr)
+    assert "Output of job will be cached in" in stderr
+    assert error_code == 0, stderr
+
+
 def test_write_summary(tmp_path: Path) -> None:
     """Test --write-summary."""
     commands = [