Skip to content

Improve environmental variable caching #2101

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cwltool/command_line_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -876,6 +876,14 @@ def remove_prefix(s: str, prefix: str) -> str:
if cls in interesting and cls not in keydict:
keydict[cls] = r

# If there are environmental variables to preserve, add it to the key
env_var_requirement = cast(dict[str, str], keydict.get("EnvVarRequirement", {}))
env_def = cast(CWLObjectType, self.get_requirement("EnvVarRequirement")[0] or {})
if runtimeContext.preserve_environment is not None:
env_def.update(JobBase.extract_environment(runtimeContext, env_var_requirement))

if env_def:
keydict["EnvVarRequirement"] = env_def
keydictstr = json_dumps(keydict, separators=(",", ":"), sort_keys=True)
cachekey = hashlib.md5(keydictstr.encode("utf-8")).hexdigest() # nosec

Expand Down
38 changes: 25 additions & 13 deletions cwltool/job.py
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,29 @@ def _preserve_environment_on_containers_warning(
# By default, don't do anything; ContainerCommandLineJob below
# will issue a warning.

@staticmethod
def extract_environment(
runtimeContext: RuntimeContext, envVarReq: Mapping[str, str]
) -> Mapping[str, str]:
"""Extract environment variables that should be preserved."""
# Start empty
env: dict[str, str] = {}
# Preserve any env vars
if runtimeContext.preserve_entire_environment:
env.update(os.environ)
elif runtimeContext.preserve_environment:
for key in runtimeContext.preserve_environment:
try:
env[key] = os.environ[key]
except KeyError:
_logger.warning(
f"Attempting to preserve environment variable {key!r} which is not present"
)
# Apply EnvVarRequirement
env.update(envVarReq)

return env

def prepare_environment(
self, runtimeContext: RuntimeContext, envVarReq: Mapping[str, str]
) -> None:
Expand All @@ -481,27 +504,16 @@ def prepare_environment(
"""
# Start empty
env: dict[str, str] = {}

# Preserve any env vars
if runtimeContext.preserve_entire_environment:
self._preserve_environment_on_containers_warning()
env.update(os.environ)
elif runtimeContext.preserve_environment:
self._preserve_environment_on_containers_warning(runtimeContext.preserve_environment)
for key in runtimeContext.preserve_environment:
try:
env[key] = os.environ[key]
except KeyError:
_logger.warning(
f"Attempting to preserve environment variable {key!r} which is not present"
)

env.update(self.extract_environment(runtimeContext, envVarReq))

# Set required env vars
env.update(self._required_env())

# Apply EnvVarRequirement
env.update(envVarReq)

# Set on ourselves
self.environment = env

Expand Down
5 changes: 5 additions & 0 deletions tests/cache_environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cwl:requirements:
- class: EnvVarRequirement
envDef:
- envName: TEST_ENV
envValue: value1
5 changes: 5 additions & 0 deletions tests/cache_environment2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
cwl:requirements:
- class: EnvVarRequirement
envDef:
- envName: TEST_ENV
envValue: value2
8 changes: 8 additions & 0 deletions tests/cache_environment_tool.cwl
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
class: CommandLineTool
cwlVersion: v1.2
inputs:
[]
outputs:
[]

baseCommand: [bash, "-c", "echo $TEST_ENV"]
28 changes: 28 additions & 0 deletions tests/test_environment.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,34 @@ def test_preserve_single(
assert_env_matches(checks, env)


@CRT_PARAMS
def test_preserve_single_missing(
crt_params: CheckHolder,
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
caplog: pytest.LogCaptureFixture,
) -> None:
"""Test that attempting to preserve an unset env var produces a warning."""
tmp_prefix = str(tmp_path / "canary")
args = crt_params.flags + [
f"--tmpdir-prefix={tmp_prefix}",
"--preserve-environment=RANDOMVAR",
]
env = get_tool_env(
tmp_path,
args,
monkeypatch=monkeypatch,
runtime_env_accepts_null=crt_params.env_accepts_null,
)
checks = crt_params.checks(tmp_prefix)
assert "RANDOMVAR" not in checks
assert (
"cwltool:job.py:487 Attempting to preserve environment variable "
"'RANDOMVAR' which is not present"
) in caplog.text
assert_env_matches(checks, env)


@CRT_PARAMS
def test_preserve_all(
crt_params: CheckHolder, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
Expand Down
47 changes: 47 additions & 0 deletions tests/test_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -1233,6 +1233,53 @@ def test_secondary_files_v1_0(tmp_path: Path, factor: str) -> None:
assert error_code == 0


@pytest.mark.parametrize("factor", test_factors)
def test_cache_environment_variable(tmp_path: Path, factor: str) -> None:
"""Ensure that changing the environment variables will result in different cache keys"""
test_file = "cache_environment_tool.cwl"
test_job_file = "cache_environment.yml"
cache_dir = str(tmp_path / "cwltool_cache")
commands = factor.split()
commands.extend(
[
"--cachedir",
cache_dir,
"--outdir",
str(tmp_path / "outdir"),
get_data("tests/" + test_file),
get_data(f"tests/{test_job_file}"),
]
)
error_code, _, stderr = get_main_output(commands)

stderr = re.sub(r"\s\s+", " ", stderr)
assert "Output of job will be cached in" in stderr
assert error_code == 0, stderr

error_code, _, stderr = get_main_output(commands)
assert "Output of job will be cached in" not in stderr
assert error_code == 0, stderr

commands = factor.split()
test_job_file = "cache_environment2.yml"
commands.extend(
[
"--cachedir",
cache_dir,
"--outdir",
str(tmp_path / "outdir"),
get_data("tests/" + test_file),
get_data(f"tests/{test_job_file}"),
]
)

error_code, _, stderr = get_main_output(commands)

stderr = re.sub(r"\s\s+", " ", stderr)
assert "Output of job will be cached in" in stderr
assert error_code == 0, stderr


def test_write_summary(tmp_path: Path) -> None:
"""Test --write-summary."""
commands = [
Expand Down
4 changes: 2 additions & 2 deletions tox.ini
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@ commands_pre =
py312-lintreadme: python -m build --outdir {distdir}

commands =
py3{9,10,11,12,13}-unit: make coverage-report coverage.xml PYTEST_EXTRA={posargs}
py3{9,10,11,12,13}-unit: make coverage-report coverage.xml PYTEST_EXTRA="{posargs}"
py3{9,10,11,12,13}-bandit: bandit -r cwltool
py3{9,10,11,12,13}-lint: make flake8 format-check codespell-check
py3{9,10,11,12,13}-mypy: make mypy mypyc PYTEST_EXTRA={posargs}
py3{9,10,11,12,13}-mypy: make mypy mypyc PYTEST_EXTRA="{posargs}"
py312-shellcheck: make shellcheck
py312-pydocstyle: make diff_pydocstyle_report
py312-lintreadme: twine check {distdir}/*
Expand Down
Loading