diff --git a/changelog.d/pr-201.md b/changelog.d/pr-201.md new file mode 100644 index 00000000..719d28af --- /dev/null +++ b/changelog.d/pr-201.md @@ -0,0 +1,3 @@ +### 🚀 Enhancements and New Features + +- WiP: introduce cont_dspath to contain what img_dspath was and fix img_dspath. [PR #201](https://github.com/datalad/datalad-container/pull/201) (by [@yarikoptic](https://github.com/yarikoptic)) diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index 2c2b69b9..209698bb 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -140,6 +140,8 @@ class ContainersAdd(Interface): this container, e.g. "singularity exec {img} {cmd}". Where '{img}' is a placeholder for the path to the container image and '{cmd}' is replaced with the desired command. Additional placeholders: + '{cont_dspath}' is relative path to the dataset containing container + specification in its config, '{img_dspath}' is relative path to the dataset containing the image, '{img_dirpath}' is the directory containing the '{img}'. """, @@ -150,7 +152,7 @@ class ContainersAdd(Interface): args=("--extra-input",), doc="""Additional file the container invocation depends on (e.g. overlays used in --call-fmt). Can be specified multiple times. - Similar to --call-fmt, the placeholders {img_dspath} and + Similar to --call-fmt, the placeholders {cont_dspath}, {img_dspath} and {img_dirpath} are available. Will be stored in the dataset config and later added alongside the container image to the `extra_inputs` field in the run-record and thus automatically be fetched when diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 50de78db..79ae94a0 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -9,7 +9,7 @@ from datalad.interface.base import build_doc from datalad.support.param import Parameter from datalad.distribution.dataset import datasetmethod -from datalad.distribution.dataset import require_dataset +from datalad.distribution.dataset import require_dataset, get_dataset_root from datalad.interface.base import eval_results from datalad.utils import ensure_iter @@ -85,14 +85,23 @@ def __call__(cmd, container_name=None, dataset=None, yield res assert container, "bug: container should always be defined here" - image_path = op.relpath(container["path"], pwd) # container record would contain path to the (sub)dataset containing # it. If not - take current dataset, as it must be coming from it - image_dspath = op.relpath(container.get('parentds', ds.path), pwd) + cont_dspath = op.relpath(container.get('parentds', ds.path), pwd) + image_path = container["path"] + # container definition might point to an image in some nested dataset. + # it might be useful to be distinguish between the two in such cases + image_dspath = op.relpath(get_dataset_root(image_path), pwd) + image_path = op.relpath(image_path, pwd) # sure we could check whether the container image is present, # but it might live in a subdataset that isn't even installed yet # let's leave all this business to `get` that is called by `run` + common_kwargs = dict( + cont_dspath=cont_dspath, + img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", + ) cmd = normalize_command(cmd) # expand the command with container execution @@ -110,13 +119,13 @@ def __call__(cmd, container_name=None, dataset=None, raise ValueError( 'cmdexe {!r} is in an old, unsupported format. ' 'Convert it to a plain string.'.format(callspec)) + + cmd_kwargs = dict( + img=image_path, + cmd=cmd, + **common_kwargs, + ) try: - cmd_kwargs = dict( - img=image_path, - cmd=cmd, - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", - ) cmd = callspec.format(**cmd_kwargs) except KeyError as exc: yield get_status_dict( @@ -134,13 +143,9 @@ def __call__(cmd, container_name=None, dataset=None, cmd = container['path'] + ' ' + cmd extra_inputs = [] - for extra_input in ensure_iter(container.get("extra-input",[]), set): + for extra_input in ensure_iter(container.get("extra-input", []), set): try: - xi_kwargs = dict( - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", - ) - extra_inputs.append(extra_input.format(**xi_kwargs)) + extra_inputs.append(extra_input.format(**common_kwargs)) except KeyError as exc: yield get_status_dict( 'run', @@ -150,7 +155,7 @@ def __call__(cmd, container_name=None, dataset=None, 'Unrecognized extra_input placeholder: %s. ' 'See containers-add for information on known ones: %s', exc, - ", ".join(xi_kwargs))) + ", ".join(common_kwargs))) return lgr.debug("extra_inputs = %r", extra_inputs) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index 4ff3248b..554903c1 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -166,6 +166,7 @@ def test_custom_call_fmt(path=None, local_file=None): url=get_local_file_url(op.join(local_file, 'some_container.img')), image='righthere', call_fmt='echo image={img} cmd={cmd} img_dspath={img_dspath} ' + 'cont_dspath={cont_dspath} ' # and environment variable being set/propagated by default 'name=$DATALAD_CONTAINER_NAME' ) @@ -175,13 +176,13 @@ def test_custom_call_fmt(path=None, local_file=None): out = WitlessRunner(cwd=subds.path).run( ['datalad', 'containers-run', '-n', 'mycontainer', 'XXX'], protocol=StdOutCapture) - assert_in('image=righthere cmd=XXX img_dspath=. name=mycontainer', + assert_in('image=righthere cmd=XXX img_dspath=. cont_dspath=. name=mycontainer', out['stdout']) out = WitlessRunner(cwd=ds.path).run( ['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'], protocol=StdOutCapture) - assert_in('image=sub/righthere cmd=XXX img_dspath=sub', out['stdout']) + assert_in('image=sub/righthere cmd=XXX img_dspath=sub cont_dspath=sub', out['stdout']) # Test within subdirectory of the super-dataset subdir = op.join(ds.path, 'subdir') @@ -189,7 +190,20 @@ def test_custom_call_fmt(path=None, local_file=None): out = WitlessRunner(cwd=subdir).run( ['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'], protocol=StdOutCapture) - assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub', out['stdout']) + assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub cont_dspath=../sub', + out['stdout']) + + # Add to super a container definition pointing to the image within sub + ds.containers_add( + 'sub-mycontainer', + # we point into subdataset + image='sub/righthere', + call_fmt=subds.config.get('datalad.containers.mycontainer.cmdexec') + ) + out = WitlessRunner(cwd=ds.path).run( + ['datalad', 'containers-run', '-n', 'sub-mycontainer', 'XXX'], + protocol=StdOutCapture) + assert_in('image=sub/righthere cmd=XXX img_dspath=sub cont_dspath=.', out['stdout']) @with_tree( @@ -223,14 +237,12 @@ def test_extra_inputs(path=None): ) commit_msg = ds.repo.call_git(["show", "--format=%B"]) cmd, runinfo = get_run_info(ds, commit_msg) - assert set( - [ - "sub/containers/container.img", - "overlay1.img", - "sub/containers/../overlays/overlay2.img", - "sub/overlays/overlay3.img", - ] - ) == set(runinfo.get("extra_inputs", set())) + assert { + "sub/containers/container.img", + "overlay1.img", + "sub/containers/../overlays/overlay2.img", + "sub/overlays/overlay3.img", + } == set(runinfo.get("extra_inputs", set())) @skip_if_no_network