-
Notifications
You must be signed in to change notification settings - Fork 223
[WIP] Command to generate pipeline container config files #3955
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: dev
Are you sure you want to change the base?
Changes from all commits
3e0821a
f3bd12c
682f140
54acc5a
b8004aa
015daa5
b8cf374
0954ee6
f5502c9
7d2586d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,133 @@ | ||||||
| import logging | ||||||
| import re | ||||||
| from pathlib import Path | ||||||
|
|
||||||
| import yaml | ||||||
|
|
||||||
| from nf_core.utils import NF_INSPECT_MIN_NF_VERSION, check_nextflow_version, pretty_nf_version, run_cmd | ||||||
|
|
||||||
| log = logging.getLogger(__name__) | ||||||
|
|
||||||
|
|
||||||
| class ContainerConfigs: | ||||||
| """Generates the container configuration files for a pipeline. | ||||||
| Args: | ||||||
| workflow_directory (str | Path): The directory containing the workflow files. | ||||||
| org (str): Organisation path. | ||||||
| """ | ||||||
|
|
||||||
| def __init__( | ||||||
| self, | ||||||
| workflow_directory: str | Path = ".", | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. let's make this simpler |
||||||
| org: str = "nf-core", | ||||||
| ): | ||||||
| self.workflow_directory = Path(workflow_directory) | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
|
||||||
| self.org: str = org | ||||||
|
|
||||||
| def generate_container_configs(self) -> None: | ||||||
| """Generate the container configuration files for a pipeline.""" | ||||||
| self.check_nextflow_version_sufficient() | ||||||
| default_config = self.generate_default_container_config() | ||||||
| self.generate_all_container_configs(default_config) | ||||||
|
|
||||||
| def check_nextflow_version_sufficient(self) -> None: | ||||||
| """Check if the Nextflow version is sufficient to run `nextflow inspect`.""" | ||||||
| if not check_nextflow_version(NF_INSPECT_MIN_NF_VERSION): | ||||||
| raise UserWarning( | ||||||
| f"To use Seqera containers Nextflow version >= {pretty_nf_version(NF_INSPECT_MIN_NF_VERSION)} is required.\n" | ||||||
| f"Please update your Nextflow version with [magenta]'nextflow self-update'[/]\n" | ||||||
| ) | ||||||
|
|
||||||
| def generate_default_container_config(self) -> str: | ||||||
| """ | ||||||
| Generate the default container configuration file for a pipeline. | ||||||
| Requires Nextflow >= 25.04.4 | ||||||
| """ | ||||||
| log.debug("Generating container config file with [magenta bold]nextflow inspect[/].") | ||||||
| try: | ||||||
| # Run nextflow inspect | ||||||
| executable = "nextflow" | ||||||
| cmd_params = f"inspect -format config {self.workflow_directory}" | ||||||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so instead of writing the default out first and extracting all info from there, i would hold the config info in a dict and write it out to all configs from there. |
||||||
| cmd_out = run_cmd(executable, cmd_params) | ||||||
| if cmd_out is None: | ||||||
| raise UserWarning("Failed to run `nextflow inspect`. Please check your Nextflow installation.") | ||||||
|
Comment on lines
+48
to
+54
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have a feeling we have this multiple times in our code base. should this be a utils function? |
||||||
|
|
||||||
| out, _ = cmd_out | ||||||
| out_str = str(out, encoding="utf-8") | ||||||
| with open(self.workflow_directory / "conf" / "containers_docker_amd64.config", "w") as fh: | ||||||
| fh.write(out_str) | ||||||
| log.info( | ||||||
| f"Generated container config file for Docker AMD64: {self.workflow_directory / 'conf' / 'containers_docker_amd64.config'}" | ||||||
| ) | ||||||
| return out_str | ||||||
|
|
||||||
| except RuntimeError as e: | ||||||
| log.error("Running 'nextflow inspect' failed with the following error:") | ||||||
| raise UserWarning(e) | ||||||
|
|
||||||
| def generate_all_container_configs(self, default_config: str) -> None: | ||||||
| """Generate the container configuration files for all platforms.""" | ||||||
| containers: dict[str, dict[str, str]] = { | ||||||
| "docker_amd64": {}, | ||||||
| "docker_arm64": {}, | ||||||
| "singularity_oras_amd64": {}, | ||||||
| "singularity_oras_arm64": {}, | ||||||
| "singularity_https_amd64": {}, | ||||||
| "singularity_https_arm64": {}, | ||||||
| "conda_amd64_lockfile": {}, | ||||||
| "conda_arm64_lockfile": {}, | ||||||
| } | ||||||
| for line in default_config.split("\n"): | ||||||
| if line.startswith("process"): | ||||||
| pattern = r"process { withName: \'(.*)\' { container = \'(.*)\' } }" | ||||||
| match = re.search(pattern, line) | ||||||
| if match: | ||||||
| try: | ||||||
| module_name = match.group(1) | ||||||
| container = match.group(2) | ||||||
| except AttributeError: | ||||||
| log.warning(f"Could not parse container for process {line}") | ||||||
| continue | ||||||
| else: | ||||||
| continue | ||||||
| containers["docker_amd64"][module_name] = container | ||||||
| for module_name in containers["docker_amd64"].keys(): | ||||||
| # Find module containers in meta.yml | ||||||
| if "_" in module_name: | ||||||
| module_path = Path(module_name.split("_")[0].lower()) / module_name.split("_")[1].lower() | ||||||
| else: | ||||||
| module_path = Path(module_name.lower()) | ||||||
|
|
||||||
| try: | ||||||
| with open(self.workflow_directory / "modules" / self.org / module_path / "meta.yml") as fh: | ||||||
| meta = yaml.safe_load(fh) | ||||||
| except FileNotFoundError: | ||||||
| log.warning(f"Could not find meta.yml for {module_name}") | ||||||
| continue | ||||||
|
|
||||||
| platforms: dict[str, list[str]] = { | ||||||
| "docker_amd64": ["docker", "linux_amd64", "name"], | ||||||
| "docker_arm64": ["docker", "linux_arm64", "name"], | ||||||
mirpedrol marked this conversation as resolved.
Show resolved
Hide resolved
|
||||||
| "singularity_oras_amd64": ["singularity", "linux_amd64", "name"], | ||||||
| "singularity_oras_arm64": ["singularity", "linux_arm64", "name"], | ||||||
| "singularity_https_amd64": ["singularity", "linux_amd64", "https"], | ||||||
| "singularity_https_arm64": ["singularity", "linux_arm64", "https"], | ||||||
| "conda_amd64_lockfile": ["conda", "linux_amd64", "lock_file"], | ||||||
| "conda_arm64_lockfile": ["conda", "linux_arm64", "lock_file"], | ||||||
| } | ||||||
|
|
||||||
| for p_name, (runtime, arch, protocol) in platforms.items(): | ||||||
| try: | ||||||
| containers[p_name][module_name] = meta["containers"][runtime][arch][protocol] | ||||||
| except KeyError: | ||||||
| log.warning(f"Could not find {p_name} container for {module_name}") | ||||||
| continue | ||||||
|
|
||||||
| # write config files | ||||||
| for platform in containers.keys(): | ||||||
| with open(self.workflow_directory / "conf" / f"containers_{platform}.config", "w") as fh: | ||||||
| for module_name in containers[platform].keys(): | ||||||
| fh.write( | ||||||
| f"process {{ withName: '{module_name}' {{ container = '{containers[platform][module_name]}' }} }}\n" | ||||||
| ) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,159 @@ | ||
| """Tests for the ContainerConfigs helper used by pipelines.""" | ||
|
|
||
| from pathlib import Path | ||
| from unittest.mock import patch | ||
|
|
||
| import pytest | ||
| import yaml | ||
|
|
||
| from nf_core.pipelines.containers_utils import ContainerConfigs | ||
| from nf_core.utils import NF_INSPECT_MIN_NF_VERSION, pretty_nf_version | ||
|
|
||
| from ..test_pipelines import TestPipelines | ||
|
|
||
|
|
||
| class TestContainerConfigs(TestPipelines): | ||
| """Tests for ContainerConfigs using a test pipeline.""" | ||
|
|
||
| def setUp(self) -> None: | ||
| super().setUp() | ||
| self.container_configs = ContainerConfigs(self.pipeline_dir, "nf-core") | ||
|
|
||
| def test_check_nextflow_version_sufficient_ok(self) -> None: | ||
| """check_nextflow_version should return silently when version is sufficient.""" | ||
| with patch( | ||
| "nf_core.pipelines.containers_utils.check_nextflow_version", | ||
| return_value=True, | ||
| ) as mocked_check: | ||
| self.container_configs.check_nextflow_version_sufficient() | ||
|
|
||
| mocked_check.assert_called_once_with(NF_INSPECT_MIN_NF_VERSION) | ||
|
|
||
| def test_check_nextflow_version_sufficient_too_low(self) -> None: | ||
| """check_nextflow_version should raise UserWarning when version is too low.""" | ||
| with patch( | ||
| "nf_core.pipelines.containers_utils.check_nextflow_version", | ||
| return_value=False, | ||
| ): | ||
| with pytest.raises(UserWarning) as excinfo: | ||
| self.container_configs.check_nextflow_version_sufficient() | ||
|
|
||
| # Error message should mention the minimal required version | ||
| assert pretty_nf_version(NF_INSPECT_MIN_NF_VERSION) in str(excinfo.value) | ||
|
|
||
| def test_generate_default_container_config(self) -> None: | ||
| """Run generate_default_container_config with mocking.""" | ||
| mock_config_bytes = b"process { withName: 'FOO_BAR' { container = 'docker://foo/bar:amd64' } }\n" | ||
|
|
||
| with patch( | ||
| "nf_core.pipelines.containers_utils.run_cmd", | ||
| return_value=(mock_config_bytes, b""), | ||
| ) as mocked_run_cmd: | ||
| out = self.container_configs.generate_default_container_config() | ||
|
|
||
| expected_cmd_params = f"inspect -format config {self.pipeline_dir}" | ||
| mocked_run_cmd.assert_called_once_with("nextflow", expected_cmd_params) | ||
|
|
||
| conf_path = Path(self.pipeline_dir / "conf" / "containers_docker_amd64.config") | ||
| assert conf_path.exists() | ||
| conf_path_content = conf_path.read_text(encoding="utf-8") | ||
| assert conf_path_content == mock_config_bytes.decode("utf-8") | ||
| assert out == conf_path_content | ||
|
|
||
| def test_generate_default_container_config_in_pipeline(self) -> None: | ||
| """Run generate_default_container_config in a pipeline.""" | ||
| out = self.container_configs.generate_default_container_config() | ||
| conf_path = Path(self.pipeline_dir / "conf" / "containers_docker_amd64.config") | ||
| assert conf_path.exists() | ||
| conf_path_content = conf_path.read_text(encoding="utf-8") | ||
| # FASTQC and MULTIQC should be present in the config file | ||
| # Don't check for the exact version | ||
| assert "process { withName: 'FASTQC' { container = 'quay.io/biocontainers/fastqc" in conf_path_content | ||
| assert "process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc" in out | ||
|
|
||
| def test_generate_all_container_configs(self) -> None: | ||
| """Run generate_all_container_configs in a pipeline.""" | ||
| # Mock generate_default_container_config() output | ||
| default_config = ( | ||
| "process { withName: 'FASTQC' { container = 'quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0' } }\n" | ||
| "process { withName: 'MULTIQC' { container = 'community.wave.seqera.io/library/multiqc:1.32--d58f60e4deb769bf' } }\n" | ||
| ) | ||
|
|
||
| # TODO: Test with real meata.yml files once they are available in the template | ||
| # Update meta.yml files | ||
| fastqc_dir = self.pipeline_dir / "modules" / "nf-core" / "fastqc" | ||
| meta = { | ||
| "containers": { | ||
| "docker": { | ||
| "linux_amd64": { | ||
| "name": "quay.io/biocontainers/fastqc:0.12.1--hdfd78af_0", | ||
| }, | ||
| "linux_arm64": { | ||
| "name": "community.wave.seqera.io/library/fastqc:0.12.1--d3caca66b4f3d3b0", | ||
| }, | ||
| }, | ||
| "singularity": { | ||
| "linux_amd64": { | ||
| "name": "oras://community.wave.seqera.io/library/fastqc:0.12.1--0827550dd72a3745", | ||
| "https": "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b280a35770a70ed67008c1d6b6db118409bc3adbb3a98edcd55991189e5116f6/data", | ||
| }, | ||
| "linux_arm64": { | ||
| "name": "oras://community.wave.seqera.io/library/fastqc:0.12.1--b2ccdee5305e5859", | ||
| "https": "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/76/76e744b425a6b4c7eb8f12e03fa15daf7054de36557d2f0c4eb53ad952f9b0e3/data", | ||
| }, | ||
| }, | ||
| "conda": { | ||
| "linux_amd64": { | ||
| "lock_file": "https://wave.seqera.io/v1alpha1/builds/5cfd0f3cb6760c42_1/condalock", | ||
| }, | ||
| "linux_arm64": { | ||
| "lock_file": "https://wave.seqera.io/v1alpha1/builds/d3caca66b4f3d3b0_1/condalock", | ||
| }, | ||
| }, | ||
| }, | ||
| } | ||
| with (fastqc_dir / "meta.yml").open("r") as fh: | ||
| current_meta = yaml.safe_load(fh) | ||
| current_meta.update(meta) | ||
| with (fastqc_dir / "meta.yml").open("w") as fh: | ||
| yaml.safe_dump(current_meta, fh) | ||
|
|
||
| self.container_configs.generate_all_container_configs(default_config) | ||
|
|
||
| conf_dir = self.pipeline_dir / "conf" | ||
| # Expected platforms and one expected container | ||
| expected_platforms = { | ||
| "docker_arm64": { | ||
| "FASTQC": "community.wave.seqera.io/library/fastqc:0.12.1--d3caca66b4f3d3b0", | ||
| }, | ||
| "singularity_oras_amd64": { | ||
| "FASTQC": "oras://community.wave.seqera.io/library/fastqc:0.12.1--0827550dd72a3745", | ||
| }, | ||
| "singularity_oras_arm64": { | ||
| "FASTQC": "oras://community.wave.seqera.io/library/fastqc:0.12.1--b2ccdee5305e5859", | ||
| }, | ||
| "singularity_https_amd64": { | ||
| "FASTQC": "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/b2/b280a35770a70ed67008c1d6b6db118409bc3adbb3a98edcd55991189e5116f6/data", | ||
| }, | ||
| "singularity_https_arm64": { | ||
| "FASTQC": "https://community-cr-prod.seqera.io/docker/registry/v2/blobs/sha256/76/76e744b425a6b4c7eb8f12e03fa15daf7054de36557d2f0c4eb53ad952f9b0e3/data", | ||
| }, | ||
| "conda_amd64_lockfile": { | ||
| "FASTQC": "https://wave.seqera.io/v1alpha1/builds/5cfd0f3cb6760c42_1/condalock", | ||
| }, | ||
| "conda_arm64_lockfile": { | ||
| "FASTQC": "https://wave.seqera.io/v1alpha1/builds/d3caca66b4f3d3b0_1/condalock", | ||
| }, | ||
| } | ||
|
|
||
| for platform in expected_platforms.keys(): | ||
| cfg_path = conf_dir / f"containers_{platform}.config" | ||
| print(cfg_path) | ||
| assert cfg_path.exists() | ||
| with cfg_path.open("r") as fh: | ||
| content = fh.readlines() | ||
| print(content) | ||
| assert ( | ||
| f"process {{ withName: 'FASTQC' {{ container = '{expected_platforms[platform]['FASTQC']}' }} }}\n" | ||
| in content | ||
| ) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
let's make this simpler