From a965c5744c815a629455d570c2f5971dd771bf89 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 8 Mar 2023 16:30:51 -0500 Subject: [PATCH 1/7] WiP: introduce cont_dspath to contain what img_dspath was and fix img_dspath to point to actual ds containing the img TODOs: add formatting of img_dspath etc based on cont_dspath. For now committing/testing as is to see if breaks anything (should not) --- datalad_container/containers_run.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index 50de78db..af25b52b 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -9,7 +9,7 @@ from datalad.interface.base import build_doc from datalad.support.param import Parameter from datalad.distribution.dataset import datasetmethod -from datalad.distribution.dataset import require_dataset +from datalad.distribution.dataset import require_dataset, get_dataset_root from datalad.interface.base import eval_results from datalad.utils import ensure_iter @@ -88,11 +88,18 @@ def __call__(cmd, container_name=None, dataset=None, image_path = op.relpath(container["path"], pwd) # container record would contain path to the (sub)dataset containing # it. If not - take current dataset, as it must be coming from it - image_dspath = op.relpath(container.get('parentds', ds.path), pwd) - + cont_dspath = op.relpath(container.get('parentds', ds.path), pwd) + # container definition might point to an image in some nested dataset. + # it might be useful to be distinguish between the two in such cases + image_dspath = get_dataset_root(image_path) # sure we could check whether the container image is present, # but it might live in a subdataset that isn't even installed yet # let's leave all this business to `get` that is called by `run` + common_kwargs = dict( + cont_dspath=cont_dspath, + img_dspath=image_dspath, + img_dirpath=op.dirname(image_path) or ".", + ) cmd = normalize_command(cmd) # expand the command with container execution @@ -114,8 +121,7 @@ def __call__(cmd, container_name=None, dataset=None, cmd_kwargs = dict( img=image_path, cmd=cmd, - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", + **common_kwargs, ) cmd = callspec.format(**cmd_kwargs) except KeyError as exc: @@ -136,11 +142,7 @@ def __call__(cmd, container_name=None, dataset=None, extra_inputs = [] for extra_input in ensure_iter(container.get("extra-input",[]), set): try: - xi_kwargs = dict( - img_dspath=image_dspath, - img_dirpath=op.dirname(image_path) or ".", - ) - extra_inputs.append(extra_input.format(**xi_kwargs)) + extra_inputs.append(extra_input.format(**common_kwargs)) except KeyError as exc: yield get_status_dict( 'run', From b1bf65f566c2611fd6885a96e90b2f95daef0986 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 8 Mar 2023 16:42:31 -0500 Subject: [PATCH 2/7] Add ability to format path with cont_dspath (+some touches around) Note that description of {img_*} placeholders did not have to change at all -- so to some degree the changes actually fixed it to become corresponding with the description. --- datalad_container/containers_add.py | 4 +++- datalad_container/containers_run.py | 33 ++++++++++++++++++++++------- 2 files changed, 28 insertions(+), 9 deletions(-) diff --git a/datalad_container/containers_add.py b/datalad_container/containers_add.py index 2c2b69b9..209698bb 100644 --- a/datalad_container/containers_add.py +++ b/datalad_container/containers_add.py @@ -140,6 +140,8 @@ class ContainersAdd(Interface): this container, e.g. "singularity exec {img} {cmd}". Where '{img}' is a placeholder for the path to the container image and '{cmd}' is replaced with the desired command. Additional placeholders: + '{cont_dspath}' is relative path to the dataset containing container + specification in its config, '{img_dspath}' is relative path to the dataset containing the image, '{img_dirpath}' is the directory containing the '{img}'. """, @@ -150,7 +152,7 @@ class ContainersAdd(Interface): args=("--extra-input",), doc="""Additional file the container invocation depends on (e.g. overlays used in --call-fmt). Can be specified multiple times. - Similar to --call-fmt, the placeholders {img_dspath} and + Similar to --call-fmt, the placeholders {cont_dspath}, {img_dspath} and {img_dirpath} are available. Will be stored in the dataset config and later added alongside the container image to the `extra_inputs` field in the run-record and thus automatically be fetched when diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index af25b52b..d158119a 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -85,10 +85,26 @@ def __call__(cmd, container_name=None, dataset=None, yield res assert container, "bug: container should always be defined here" - image_path = op.relpath(container["path"], pwd) # container record would contain path to the (sub)dataset containing # it. If not - take current dataset, as it must be coming from it cont_dspath = op.relpath(container.get('parentds', ds.path), pwd) + + image_path = container["path"] + try: + image_path = image_path.format(cont_dspath=cont_dspath) + except KeyError as exc: + yield get_status_dict( + 'run', + ds=ds, + status='error', + message=( + 'Unrecognized path placeholder: %s. ' + 'See containers-add for information on known ones: %s', + exc, + ", ".join(['cont_dspath']))) + return + image_path = op.relpath(image_path, pwd) + # container definition might point to an image in some nested dataset. # it might be useful to be distinguish between the two in such cases image_dspath = get_dataset_root(image_path) @@ -117,12 +133,13 @@ def __call__(cmd, container_name=None, dataset=None, raise ValueError( 'cmdexe {!r} is in an old, unsupported format. ' 'Convert it to a plain string.'.format(callspec)) + + cmd_kwargs = dict( + img=image_path, + cmd=cmd, + **common_kwargs, + ) try: - cmd_kwargs = dict( - img=image_path, - cmd=cmd, - **common_kwargs, - ) cmd = callspec.format(**cmd_kwargs) except KeyError as exc: yield get_status_dict( @@ -140,7 +157,7 @@ def __call__(cmd, container_name=None, dataset=None, cmd = container['path'] + ' ' + cmd extra_inputs = [] - for extra_input in ensure_iter(container.get("extra-input",[]), set): + for extra_input in ensure_iter(container.get("extra-input", []), set): try: extra_inputs.append(extra_input.format(**common_kwargs)) except KeyError as exc: @@ -152,7 +169,7 @@ def __call__(cmd, container_name=None, dataset=None, 'Unrecognized extra_input placeholder: %s. ' 'See containers-add for information on known ones: %s', exc, - ", ".join(xi_kwargs))) + ", ".join(common_kwargs))) return lgr.debug("extra_inputs = %r", extra_inputs) From 9a9e82496993e9a5cde5557451212363494823d7 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 8 Mar 2023 17:36:26 -0500 Subject: [PATCH 3/7] Styling since pycharm was complaining --- datalad_container/tests/test_run.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index 4ff3248b..9e9cc7f9 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -223,14 +223,12 @@ def test_extra_inputs(path=None): ) commit_msg = ds.repo.call_git(["show", "--format=%B"]) cmd, runinfo = get_run_info(ds, commit_msg) - assert set( - [ - "sub/containers/container.img", - "overlay1.img", - "sub/containers/../overlays/overlay2.img", - "sub/overlays/overlay3.img", - ] - ) == set(runinfo.get("extra_inputs", set())) + assert { + "sub/containers/container.img", + "overlay1.img", + "sub/containers/../overlays/overlay2.img", + "sub/overlays/overlay3.img", + } == set(runinfo.get("extra_inputs", set())) @skip_if_no_network From c3e069d56837df44c954d568e7b70414e147f83a Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 8 Mar 2023 17:36:52 -0500 Subject: [PATCH 4/7] BF: deduce dataset root on full path before taking relative --- datalad_container/containers_run.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index d158119a..ab99b830 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -103,11 +103,10 @@ def __call__(cmd, container_name=None, dataset=None, exc, ", ".join(['cont_dspath']))) return - image_path = op.relpath(image_path, pwd) - # container definition might point to an image in some nested dataset. # it might be useful to be distinguish between the two in such cases - image_dspath = get_dataset_root(image_path) + image_dspath = op.relpath(get_dataset_root(image_path), pwd) + image_path = op.relpath(image_path, pwd) # sure we could check whether the container image is present, # but it might live in a subdataset that isn't even installed yet # let's leave all this business to `get` that is called by `run` From ae5b444d829568e253144c7f6058c967a6e40411 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 8 Mar 2023 20:38:48 -0500 Subject: [PATCH 5/7] RM: remove trying to provide cont_path to container image config Because then we would need to format in other places like for get etc, not to mention that likely in reproman would need to do more magic. Since it makes sense ATM only to have path from the location where container is defined, just keep it that way and not bother interpolating. --- datalad_container/containers_run.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/datalad_container/containers_run.py b/datalad_container/containers_run.py index ab99b830..79ae94a0 100644 --- a/datalad_container/containers_run.py +++ b/datalad_container/containers_run.py @@ -90,19 +90,6 @@ def __call__(cmd, container_name=None, dataset=None, cont_dspath = op.relpath(container.get('parentds', ds.path), pwd) image_path = container["path"] - try: - image_path = image_path.format(cont_dspath=cont_dspath) - except KeyError as exc: - yield get_status_dict( - 'run', - ds=ds, - status='error', - message=( - 'Unrecognized path placeholder: %s. ' - 'See containers-add for information on known ones: %s', - exc, - ", ".join(['cont_dspath']))) - return # container definition might point to an image in some nested dataset. # it might be useful to be distinguish between the two in such cases image_dspath = op.relpath(get_dataset_root(image_path), pwd) From b87cafc10080fa5bec9a91e59a8ee1a6b615dee1 Mon Sep 17 00:00:00 2001 From: Yaroslav Halchenko Date: Wed, 8 Mar 2023 20:39:17 -0500 Subject: [PATCH 6/7] TST: Extend test_custom_call_fmt test to test cont_dspath as well --- datalad_container/tests/test_run.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/datalad_container/tests/test_run.py b/datalad_container/tests/test_run.py index 9e9cc7f9..554903c1 100644 --- a/datalad_container/tests/test_run.py +++ b/datalad_container/tests/test_run.py @@ -166,6 +166,7 @@ def test_custom_call_fmt(path=None, local_file=None): url=get_local_file_url(op.join(local_file, 'some_container.img')), image='righthere', call_fmt='echo image={img} cmd={cmd} img_dspath={img_dspath} ' + 'cont_dspath={cont_dspath} ' # and environment variable being set/propagated by default 'name=$DATALAD_CONTAINER_NAME' ) @@ -175,13 +176,13 @@ def test_custom_call_fmt(path=None, local_file=None): out = WitlessRunner(cwd=subds.path).run( ['datalad', 'containers-run', '-n', 'mycontainer', 'XXX'], protocol=StdOutCapture) - assert_in('image=righthere cmd=XXX img_dspath=. name=mycontainer', + assert_in('image=righthere cmd=XXX img_dspath=. cont_dspath=. name=mycontainer', out['stdout']) out = WitlessRunner(cwd=ds.path).run( ['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'], protocol=StdOutCapture) - assert_in('image=sub/righthere cmd=XXX img_dspath=sub', out['stdout']) + assert_in('image=sub/righthere cmd=XXX img_dspath=sub cont_dspath=sub', out['stdout']) # Test within subdirectory of the super-dataset subdir = op.join(ds.path, 'subdir') @@ -189,7 +190,20 @@ def test_custom_call_fmt(path=None, local_file=None): out = WitlessRunner(cwd=subdir).run( ['datalad', 'containers-run', '-n', 'sub/mycontainer', 'XXX'], protocol=StdOutCapture) - assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub', out['stdout']) + assert_in('image=../sub/righthere cmd=XXX img_dspath=../sub cont_dspath=../sub', + out['stdout']) + + # Add to super a container definition pointing to the image within sub + ds.containers_add( + 'sub-mycontainer', + # we point into subdataset + image='sub/righthere', + call_fmt=subds.config.get('datalad.containers.mycontainer.cmdexec') + ) + out = WitlessRunner(cwd=ds.path).run( + ['datalad', 'containers-run', '-n', 'sub-mycontainer', 'XXX'], + protocol=StdOutCapture) + assert_in('image=sub/righthere cmd=XXX img_dspath=sub cont_dspath=.', out['stdout']) @with_tree( From 7078de3f263e74328ff01569daab48d100a07953 Mon Sep 17 00:00:00 2001 From: DataLad Bot Date: Thu, 9 Mar 2023 01:41:41 +0000 Subject: [PATCH 7/7] [release-action] Autogenerate changelog snippet for PR 201 --- changelog.d/pr-201.md | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 changelog.d/pr-201.md diff --git a/changelog.d/pr-201.md b/changelog.d/pr-201.md new file mode 100644 index 00000000..719d28af --- /dev/null +++ b/changelog.d/pr-201.md @@ -0,0 +1,3 @@ +### 🚀 Enhancements and New Features + +- WiP: introduce cont_dspath to contain what img_dspath was and fix img_dspath. [PR #201](https://github.com/datalad/datalad-container/pull/201) (by [@yarikoptic](https://github.com/yarikoptic))