diff --git a/CHANGELOG.md b/CHANGELOG.md index 9f60130955..b6e709d4d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Pin j178/prek-action action to 91fd7d7 ([#3931](https://github.com/nf-core/tools/pull/3931)) - add pre-commit hook to keep uv.lock in sync ([#3933](https://github.com/nf-core/tools/pull/3933)) - Update mcr.microsoft.com/devcontainers/miniconda Docker digest to 2be0f5a ([#3946](https://github.com/nf-core/tools/pull/3946)) +- Implement wave container commands ([#3954](https://github.com/nf-core/tools/pull/3954)) - Fix docker errors in test ([#3924](https://github.com/nf-core/tools/pull/3924)) - Update actions/checkout digest to 8e8c483 ([#3956](https://github.com/nf-core/tools/pull/3956)) diff --git a/nf_core/__main__.py b/nf_core/__main__.py index de682be24f..a500da6ed3 100644 --- a/nf_core/__main__.py +++ b/nf_core/__main__.py @@ -17,6 +17,10 @@ from nf_core import __version__ from nf_core.commands_modules import ( modules_bump_versions, + modules_containers_conda_lock, + modules_containers_create, + modules_containers_lint, + modules_containers_list, modules_create, modules_info, modules_install, @@ -862,7 +866,7 @@ def command_pipelines_schema_docs(directory, schema_file, output, format, force, help="Do not pull in latest changes to local clone of modules repository.", ) @click.command_panel("For pipeline development", commands=["list", "info", "install", "update", "remove", "patch"]) -@click.command_panel("For module development", commands=["create", "lint", "test", "bump-versions"]) +@click.command_panel("For module development", commands=["create", "lint", "test", "bump-versions", "containers"]) @click.pass_context def modules(ctx, git_remote, branch, no_pull): """ @@ -1377,6 +1381,89 @@ def command_modules_bump_versions(ctx, tool, directory, all, show_all, dry_run): modules_bump_versions(ctx, tool, directory, all, show_all, dry_run) +@modules.group("containers") +@click.pass_context +def modules_containers(ctx): + """Manage module container builds and metadata.""" + pass + + +@modules_containers.command("create") +@click.pass_context +@click.option( + "-await", + "--await", + "await_", + is_flag=True, + default=False, + help="Wait for the container build to finish.", +) +@click.argument( + "module", + type=str, + required=False, + callback=normalize_case, + metavar=" or ", + shell_complete=autocomplete_modules, +) +def command_modules_containers_create(ctx, await_, module): + """ + Build docker and singularity container files for linux/arm64 and linux/amd64 with wave from environment.yml and create container config file. + """ + modules_containers_create(ctx, module, await_) + + +@modules_containers.command("conda-lock") +@click.pass_context +@click.argument( + "module", + type=str, + required=False, + callback=normalize_case, + metavar=" or ", + shell_complete=autocomplete_modules, +) +def command_modules_containers_conda_lock(ctx, module): + """ + Build a Docker linux/arm64 container and fetch the conda lock file for a module. + """ + modules_containers_conda_lock(ctx, module) + + +@modules_containers.command("lint") +@click.pass_context +@click.argument( + "module", + type=str, + required=False, + callback=normalize_case, + metavar=" or ", + shell_complete=autocomplete_modules, +) +def command_modules_containers_lint(ctx, module): + """ + Confirm that container images for a module exist. + """ + modules_containers_lint(ctx, module) + + +@modules_containers.command("list") +@click.pass_context +@click.argument( + "module", + type=str, + required=False, + callback=normalize_case, + metavar=" or ", + shell_complete=autocomplete_modules, +) +def command_modules_containers_list(ctx, module): + """ + Print containers defined in a module meta.yml. + """ + modules_containers_list(ctx, module) + + # nf-core subworkflows click command @nf_core_cli.group(aliases=["s", "swf", "subworkflow"]) @click.option( diff --git a/nf_core/commands_modules.py b/nf_core/commands_modules.py index e8b7341827..64596c294d 100644 --- a/nf_core/commands_modules.py +++ b/nf_core/commands_modules.py @@ -3,7 +3,7 @@ import rich -from nf_core.utils import rich_force_colors +from nf_core.utils import CONTAINER_PLATFORMS, rich_force_colors log = logging.getLogger(__name__) stdout = rich.console.Console(force_terminal=rich_force_colors()) @@ -356,3 +356,66 @@ def modules_bump_versions(ctx, tool, directory, all, show_all, dry_run): except (UserWarning, LookupError) as e: log.critical(e) sys.exit(1) + + +def modules_containers_create(ctx, module, await_: bool): + """ + Build docker and singularity containers for linux/arm64 and linux/amd64 using wave. + """ + from nf_core.modules.containers import ModuleContainers + + try: + manager = ModuleContainers(module=module, directory=".") + _ = manager.create(await_) + manager.update_containers_in_meta() + except (UserWarning, LookupError, FileNotFoundError, ValueError) as e: + log.error(e) + sys.exit(1) + + +def modules_containers_conda_lock(ctx, module, platform=CONTAINER_PLATFORMS[0]): + """ + Build a Docker linux/arm64 container and fetch the conda lock file using wave. + """ + from nf_core.modules.containers import ModuleContainers + + try: + manager = ModuleContainers(module, ".") + lock_file = manager.get_conda_lock_file(platform) + stdout.print(lock_file) + except (UserWarning, LookupError, FileNotFoundError, ValueError) as e: + log.error(e) + sys.exit(1) + + +def modules_containers_list(ctx, module): + """ + Print containers defined in a module meta.yml. + """ + from nf_core.modules.containers import ModuleContainers + + try: + manager = ModuleContainers(module, ".") + containers = manager.list_containers(module) + t = rich.table.Table("Container System", "Platform", "Image") + for cs, p, img in containers: + t.add_row(cs, p, img) + stdout.print(t) + except (UserWarning, LookupError, FileNotFoundError, ValueError) as e: + log.error(e) + sys.exit(1) + + +def modules_containers_lint(ctx, module): + """ + Confirm containers are defined for the module. + """ + from nf_core.modules.containers import ModuleContainers + + try: + manager = ModuleContainers(module, ".") + containers = manager.lint(module) + stdout.print(f"Found {len(containers)} container(s) for {module}.") + except (UserWarning, LookupError, FileNotFoundError, ValueError) as e: + log.error(e) + sys.exit(1) diff --git a/nf_core/components/nfcore_component.py b/nf_core/components/nfcore_component.py index 8d2a5b9c55..08addc11d4 100644 --- a/nf_core/components/nfcore_component.py +++ b/nf_core/components/nfcore_component.py @@ -2,11 +2,14 @@ The NFCoreComponent class holds information and utility functions for a single module or subworkflow """ +import json import logging import re from pathlib import Path from typing import Any +from nf_core.utils import NF_INSPECT_MIN_NF_VERSION, check_nextflow_version, run_cmd, set_wd_tempdir + log = logging.getLogger(__name__) @@ -58,6 +61,7 @@ def __init__( self.is_patched: bool = False self.branch: str | None = None self.workflow_name: str | None = None + self.container: str if remote_component: # Initialize the important files @@ -341,3 +345,63 @@ def get_topics_from_main_nf(self) -> None: log.debug(f"Found {len(list(topics.keys()))} topics in {self.main_nf}") log.debug(f"Topics: {topics}") self.topics = topics + + def get_container_from_main_nf(self) -> None: + if self.component_type == "module": + if check_nextflow_version(NF_INSPECT_MIN_NF_VERSION): + self.container = self._get_container_with_inspect() + else: + self.container = self._get_container_with_regex() + + if not self.container: + log.warning(f"No container was extracted for {self.component_name} from {self.main_nf}") + + def _get_container_with_inspect(self): + with set_wd_tempdir(): + self.component_dir.absolute() + + executable = "nextflow" + cmd_params = f"inspect -format json {self.main_nf}" + cmd_out = run_cmd(executable, cmd_params) + if cmd_out is None: + log.debug("Failed to run `nextflow inspect`") + log.debug("Falling back to regex method") + return self._get_container_with_regex() + + out, _ = cmd_out + out_json = json.loads(out) + container = out_json.get("processes", [{}])[0].get("container", None) + if container is None: + log.debug( + f"Container for {self.component_name} could not be extracted from the output of nextflow inspect" + ) + log.debug(f"Output of nextflow inspect: {out}") + log.debug("Falling back to regex method.") + return self._get_container_with_regex() + + return container + + def _get_container_with_regex(self): + with open(self.main_nf) as f: + data = f.read() + + if "container:" not in data: + log.debug(f"Could not find a container directive for {self.component_name} in {self.main_nf}") + return "" + + # Regex explained: + # 1. negative lookahead for "container" and arbitrary white spaces. + # 2. Capturing group 1: Match a quote char " or ' + # 3. Match any characters + # 4. Match whatever was most recently captured in capturing group 1 + regex_container = r"(?<=container\s+)([\"']).+?(\1)" + match = re.search(regex_container, data) + if not match: + log.warning( + f"Container for {self.component_name} could not be extracted from {self.main_nf} with regex" + ) + return "" + + # quotes " or ' were matched as well and are clipped + container = data[match.start()[0] + 1 : match.end()[0] - 1] + return container diff --git a/nf_core/module-template/main.nf b/nf_core/module-template/main.nf index 49802b58c9..a87fce6aa5 100644 --- a/nf_core/module-template/main.nf +++ b/nf_core/module-template/main.nf @@ -25,6 +25,8 @@ process {{ component_name_underscore|upper }} { // TODO nf-core: See section in main README for further information regarding finding and adding container addresses to the section below. {% endif -%} conda "${moduleDir}/environment.yml" + + // TODO container-conversion: Update to only one line. Move the platform logic to meta.yml container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? '{{ singularity_container if singularity_container else 'https://depot.galaxyproject.org/singularity/YOUR-TOOL-HERE' }}': '{{ docker_container if docker_container else 'biocontainers/YOUR-TOOL-HERE' }}' }" diff --git a/nf_core/module-template/meta.yml b/nf_core/module-template/meta.yml index e9e5c114bd..4884033157 100644 --- a/nf_core/module-template/meta.yml +++ b/nf_core/module-template/meta.yml @@ -73,3 +73,5 @@ authors: - "{{ author }}" maintainers: - "{{ author }}" + +# TODO container-conversion: Add "containers" section diff --git a/nf_core/modules/containers.py b/nf_core/modules/containers.py new file mode 100644 index 0000000000..f21d092108 --- /dev/null +++ b/nf_core/modules/containers.py @@ -0,0 +1,220 @@ +import logging +from concurrent.futures import ThreadPoolExecutor, as_completed +from pathlib import Path +from urllib.parse import quote + +import requests +import yaml + +from nf_core.utils import CONTAINER_PLATFORMS, CONTAINER_SYSTEMS, run_cmd + +log = logging.getLogger(__name__) + + +class ModuleContainers: + """ + Helpers for building, linting and listing module containers. + """ + + IMAGE_KEY = "name" + BUILD_ID_KEY = "buildId" + SCAN_ID_KEY = "scanId" + LOCK_FILE_KEY = "lock_file" + + def __init__(self, module: str, directory: str | Path = "."): + self.directory = Path(directory) + self.module = module + self.module_directory = self.get_module_dir(module) + self.condafile = self.get_environment_path(self.module_directory) + self.metafile = self.get_metayaml_path(self.module_directory) + self.containers: dict | None = None + + def create(self, await_: bool = False) -> dict[str, dict[str, dict[str, str]]]: + """ + Build docker and singularity containers for linux/amd64 and linux/arm64 using wave. + """ + containers: dict = {cs: {p: dict() for p in CONTAINER_PLATFORMS} for cs in CONTAINER_SYSTEMS} + tasks = dict() + threads = max(len(CONTAINER_SYSTEMS) * len(CONTAINER_PLATFORMS), 1) + with ThreadPoolExecutor(max_workers=threads) as pool: + for cs in CONTAINER_SYSTEMS: + for platform in CONTAINER_PLATFORMS: + fut = pool.submit(self.request_container, cs, platform, self.condafile, await_) + tasks[fut] = (cs, platform) + + for fut in as_completed(tasks): + cs, platform = tasks[fut] + # Add container info for all container systems + containers[cs][platform] = fut.result() + + # Add conda lock information based on info for docker container + if cs != "docker": + continue + + build_id = containers[cs][platform].get(self.BUILD_ID_KEY, "") + if not build_id: + log.debug("Docker image for {platform} missing - Conda-lock skipped") + continue + + conda_data = containers.get("conda", dict()) + conda_data.update({platform: {self.LOCK_FILE_KEY: self.get_conda_lock_url(build_id)}}) + containers["conda"] = conda_data + + self.containers = containers + return containers + + @classmethod + def request_container(cls, container_system: str, platform: str, conda_file: Path, await_build=False) -> dict: + assert conda_file.exists() + assert container_system in CONTAINER_SYSTEMS + assert platform in CONTAINER_PLATFORMS + + container: dict[str, str] = dict() + exectuable = "wave" + args = ["--conda-file", str(conda_file.absolute()), "--freeze", "--platform", platform, "-o yaml"] + if container_system == "singularity": + args.append("--singularity") + if await_build: + args.append("--await") + + args_str = " ".join(args) + log.debug(f"Wave command to request container ({container_system} {platform}): `wave {args_str}`") + out = run_cmd(exectuable, args_str) + + if out is None: + raise RuntimeError("Wave command did not return any output") + + try: + meta_data = yaml.safe_load(out[0].decode()) or dict() + except (KeyError, AttributeError, yaml.YAMLError) as e: + log.debug(f"Output yaml from wave build command: {out}") + raise RuntimeError(f"Could not parse wave YAML metadata ({container_system} {platform})") from e + + image = meta_data.get("targetImage") or meta_data.get("containerImage") or "" + if not image: + raise RuntimeError(f"Wave build ({container_system} {platform}) did not return an image name") + + container[cls.IMAGE_KEY] = image + + build_id = meta_data.get(cls.BUILD_ID_KEY, "") + if build_id: + container[cls.BUILD_ID_KEY] = build_id + + scan_id = meta_data.get(cls.SCAN_ID_KEY, "") + if scan_id: + container[cls.SCAN_ID_KEY] = scan_id + + return container + + @staticmethod + def get_conda_lock_url(build_id) -> str: + build_id_safe = quote(build_id, safe="") + url = f"https://wave.seqera.io/v1alpha1/builds/{build_id_safe}/condalock" + return url + + def get_conda_lock_file(self, platform: str) -> str: + """ + Get the conda lock file for an existing environment. + Try (in that order): + 1. reading from meta.yml + 2. reading from cached containers + 3. recreating with wave commands + """ + assert platform in CONTAINER_PLATFORMS + + containers = self.containers or self.get_containers_from_meta() or self.create() or dict() + + conda_lock_url = containers.get("conda", dict()).get(platform, dict()).get(self.LOCK_FILE_KEY) + if not conda_lock_url: + raise ValueError("") + + return self.request_conda_lock_file(conda_lock_url) + + @staticmethod + def request_conda_lock_file(conda_lock_url: str) -> str: + resp = requests.get(conda_lock_url) + return resp.text + + # def lint(self, module: str) -> list[str]: + # """ + # Confirm containers are defined for the module. + # """ + # return self._containers_from_meta(self._resolve_module_dir(module)) + + def list_containers(self) -> list[tuple[str, str, str]]: + """ + Return containers defined in the module meta.yml as a list of (, , ). + """ + containers_valid = self.get_containers_from_meta() + containers_flat = [ + (cs, p, containers_valid[cs][p]["name"]) for cs in CONTAINER_SYSTEMS for p in CONTAINER_PLATFORMS + ] + return containers_flat + + def get_module_dir(self, module: str | Path) -> Path: + if module is None: + raise ValueError("Please specify a module name.") + + module_dir = Path(self.directory, "modules", "nf-core", module) + if not module_dir.exists(): + raise ValueError(f"Module '{module}' not found at {module_dir}") + + return module_dir + + @staticmethod + def get_environment_path(module_dir: Path) -> Path: + env_path = module_dir / "environment.yml" + if not env_path.exists(): + raise FileNotFoundError(f"environment.yml not found for module at {module_dir}") + return env_path + + @staticmethod + def get_metayaml_path(module_dir: Path) -> Path: + metayaml_path = module_dir / "meta.yml" + if not metayaml_path.exists(): + raise FileNotFoundError(f"meta.yml not found for module at {module_dir}") + return metayaml_path + + def get_meta(self) -> dict: + with open(self.metafile) as f: + meta = yaml.safe_load(f) + return meta + + def get_containers_from_meta(self) -> dict: + """ + Return containers defined in the module meta.yml. + """ + assert self.metafile.exists() + + meta = self.get_meta() + containers = meta.get("containers", dict()) + if not containers: + log.warning(f"Section 'containers' missing from meta.yaml for module '{self.module}'") + + for system in CONTAINER_SYSTEMS: + cs = containers.get(system) + if not cs: + raise ValueError(f"Container missing for {cs}") + + for pf in CONTAINER_PLATFORMS: + spec = containers.get(pf) + if not spec: + raise ValueError(f"Platform build {pf} missing for {cs} container for module {self.module}") + + return containers + + def update_containers_in_meta(self) -> None: + if self.containers is None: + log.debug("Containers not initialized - running `create()` ...") + self.create() + + meta = self.get_meta() + meta_containers = meta.get("containers", dict()) + meta_containers.update(self.containers) + meta["containers"] = meta_containers + + # TODO container-conversion: sort the yaml (again) -> call linting? + + out = yaml.dump(meta) + with open(self.metafile, "w") as f: + f.write(out) diff --git a/nf_core/modules/lint/__init__.py b/nf_core/modules/lint/__init__.py index 71a79d0de5..6ca845c42e 100644 --- a/nf_core/modules/lint/__init__.py +++ b/nf_core/modules/lint/__init__.py @@ -236,6 +236,7 @@ def lint_module( mod.get_inputs_from_main_nf() mod.get_outputs_from_main_nf() mod.get_topics_from_main_nf() + # TODO container-conversion: get_containers from main_nf # Update meta.yml file if requested if self.fix and mod.meta_yml is not None: self.update_meta_yml_file(mod) @@ -263,6 +264,8 @@ def lint_module( mod.get_inputs_from_main_nf() mod.get_outputs_from_main_nf() mod.get_topics_from_main_nf() + # TODO container-conversion: get_containers from main_nf + # Update meta.yml file if requested if self.fix: self.update_meta_yml_file(mod) @@ -324,7 +327,13 @@ def _find_meta_info(meta_yml: dict, element_name: str, is_output=False) -> dict: return {} def _sort_meta_yml(meta_yml: dict) -> dict: - """Ensure topics comes after input/output and before authors""" + """ + Ensure topics comes after input/output and before authors. + Ensure containers comes at the end of the meta.yml. + """ + + # TODO container-conversion: Sort container section to end of meta.yml + # Early return if no topics to reorder if "topics" not in meta_yml: return meta_yml @@ -363,6 +372,9 @@ def _sort_meta_yml(meta_yml: dict) -> dict: if "output" in meta_yml: correct_outputs = self.obtain_outputs(mod.outputs) meta_outputs = self.obtain_outputs(meta_yml["output"]) + if "containers" in meta_yml: + # TODO container-conversion: Read from main.nf + pass correct_topics = self.obtain_topics(mod.topics) meta_topics = self.obtain_topics(meta_yml.get("topics", {})) @@ -543,6 +555,9 @@ def _add_edam_ontologies(section, edam_formats, desc): if hasattr(corrected_meta_yml["output"][versions_key], "yaml_set_anchor"): corrected_meta_yml["output"][versions_key].yaml_set_anchor(versions_key) + # TODO container-conversion: If containers in original meta.yml: + # - Run _add_containers + corrected_meta_yml = _sort_meta_yml(corrected_meta_yml) with open(mod.meta_yml, "w") as fh: diff --git a/nf_core/utils.py b/nf_core/utils.py index ad72559e7b..522b626fc0 100644 --- a/nf_core/utils.py +++ b/nf_core/utils.py @@ -17,6 +17,7 @@ import shlex import subprocess import sys +import tempfile import time from collections.abc import Callable, Generator from contextlib import contextmanager @@ -92,6 +93,9 @@ ) NFCORE_DIR = Path(os.environ.get("XDG_CONFIG_HOME", os.path.join(os.getenv("HOME") or "", ".config")), "nfcore") +CONTAINER_SYSTEMS = ["docker", "singularity"] +CONTAINER_PLATFORMS = ["linux/amd64", "linux/arm64"] + def fetch_remote_version(source_url): response = requests.get(source_url, timeout=3) @@ -1643,6 +1647,17 @@ def set_wd(path: Path) -> Generator[None, None, None]: os.chdir(start_wd) +@contextmanager +def set_wd_tempdir() -> Generator[None, None, None]: + """ + Context manager to provide and change into a tempdir and ensure its removal and return to the + original_dir upon exceptions. + """ + with tempfile.TemporaryDirectory() as tmp: + with set_wd(Path(tmp)): + yield + + def get_wf_files(wf_path: Path): """Return a list of all files in a directory (ignores .gitigore files)"""