Skip to content

Commit

Permalink
Minor meta-recipe features (#49)
Browse files Browse the repository at this point in the history
* Add prediction to dir-path for meta-recipes using and ID

* New unit tests for predict-path with 'dir-path' and 'id' arguments for meta-recipes

* Update search results for meta-recipes

* Unit test for searching for meta-recipes

* Update version to 1.1.1
  • Loading branch information
mikecormier authored Jan 1, 2021
1 parent 818421d commit 25f0491
Show file tree
Hide file tree
Showing 5 changed files with 195 additions and 55 deletions.
2 changes: 1 addition & 1 deletion ggd/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
__version__ = "1.1.0"
__version__ = "1.1.1"

123 changes: 79 additions & 44 deletions ggd/predict_path.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,53 @@ def add_predict_path(p):
help="Predict the install file path of a data package that hasn't been installed yet. (Use for workflows, such as Snakemake)",
description="Get a predicted install file path for a data package before it is installed. (Use for workflows, such as Snakemake)",
)

c.add_argument(
"-c",
"--channel",
default="genomics",
choices=[str(x) for x in get_ggd_channels()],
help="The ggd channel of the recipe to find. (Default = genomics)",

)
c.add_argument(
"--prefix",
default=None,
help="(Optional) The name or the full directory path to an conda environment. The predicted path will be based on this conda environment. When installing, the data package should also be installed in this environment. (Only needed if not predicting for a path in the current conda environment)",
)
c2 = c.add_argument_group("required arguments")

c.add_argument(
"--id",
metavar="meta-recipe ID",
default = None,
help = "(Optional) The ID to predict the path for if the package is a meta-recipe. If it is not a meta-recipe it will be ignored"
)

c2 = c.add_argument_group("One Argument Required")

c2.add_argument(
"-pn",
"--package-name",
required=True,
help="(Required) The name of the data package to predict a file path for",

"--dir-path",
action="store_true",
help = "(Required if '--file-name' not used) Whether or not to get the predicted directory path rather then the predicted file path. If both --file-name and --dir-path are provided the --file-name will be used and --dir-path will be ignored",
)

c2.add_argument(
"-fn",
"--file-name",
default = None,
help="(Required if '--dir-path' not used) The name of the file to predict that path for. It is best if you give the full and correct name of the file to predict the path for. If not, ggd will try to identify the right file, but won't guarantee that it is the right file",
)

c3 = c.add_argument_group("Required Arguments")

c3.add_argument(
"-pn",
"--package-name",
required=True,
help="(Required) The name of the file to predict that path for. It is best if you give the full and correct name of the file to predict the path for. If not, ggd will try to identify the right file, but won't guarantee that it is the right file",
help="(Required) The name of the data package to predict a file path for",
)

c.set_defaults(func=predict_path)


Expand Down Expand Up @@ -89,7 +111,12 @@ def predict_path(parser, args):
import os
import re

from .utils import conda_root, get_conda_prefix_path, prefix_in_conda
from .utils import check_for_meta_recipes, conda_root, get_conda_prefix_path, prefix_in_conda
from .install import get_idname_from_metarecipe

if not args.dir_path and args.file_name is None:
print(":ggd:predict-path: !!ERROR!! Either the '--file-name' or the '--dir-path' argument is required. Neither was given")
sys.exit()

## get prefix
CONDA_ROOT = (
Expand All @@ -109,60 +136,68 @@ def predict_path(parser, args):
)
)

## Check there is a "final-files" in the metadata for the package
if (
"final-files" not in metadata_dict["packages"][args.package_name]["tags"]
or len(
metadata_dict["packages"][args.package_name]["tags"].get("final-files", [])
)
== 0
):
sys.exit(
"\n:ggd:predict-path: The {p} data package does not have the final data files listed. This packages needs to be updated. To update, contact the GoGetData team at https://github.com/gogetdata/ggd-recipes\n".format(
p=args.package_name
)
)
if args.file_name is not None:

## Check that the file is one of the final-files listed in the metadata
if (
args.file_name
not in metadata_dict["packages"][args.package_name]["tags"]["final-files"]
):
matching_files = [
x
for x in metadata_dict["packages"][args.package_name]["tags"]["final-files"]
if re.search(args.file_name, x)
]
if len(matching_files) > 0:
## Chose the first file that matched
file_name = matching_files[0]
else:
## Check there is a "final-files" in the metadata for the package
if (
"final-files" not in metadata_dict["packages"][args.package_name]["tags"]
or len(
metadata_dict["packages"][args.package_name]["tags"].get("final-files", [])
)
== 0
):
sys.exit(
"\n:ggd:predict-path: The {f} file is not one of the files listed for this package. The files installed by this package are: \n\t\t{fo}".format(
f=args.file_name,
fo="\n\t\t".join(
metadata_dict["packages"][args.package_name]["tags"][
"final-files"
]
),
"\n:ggd:predict-path: The {p} data package does not have the final data files listed. This packages needs to be updated. To update, contact the GoGetData team at https://github.com/gogetdata/ggd-recipes\n".format(
p=args.package_name
)
)
else:
file_name = args.file_name

## Check that the file is one of the final-files listed in the metadata
if (
args.file_name
not in metadata_dict["packages"][args.package_name]["tags"]["final-files"]
):
matching_files = [
x
for x in metadata_dict["packages"][args.package_name]["tags"]["final-files"]
if re.search(args.file_name, x)
]
if len(matching_files) > 0:
## Chose the first file that matched
file_name = matching_files[0]
else:
sys.exit(
"\n:ggd:predict-path: The {f} file is not one of the files listed for this package. The files installed by this package are: \n\t\t{fo}".format(
f=args.file_name,
fo="\n\t\t".join(
metadata_dict["packages"][args.package_name]["tags"][
"final-files"
]
),
)
)
else:
file_name = args.file_name

elif args.dir_path:
file_name = ""


## Get path information
species = metadata_dict["packages"][args.package_name]["identifiers"]["species"]
build = metadata_dict["packages"][args.package_name]["identifiers"]["genome-build"]
version = metadata_dict["packages"][args.package_name]["version"]

name = args.package_name if not check_for_meta_recipes(args.package_name, metadata_dict) else get_idname_from_metarecipe(args.id.lower(), args.package_name, metadata_dict) if args.id is not None else args.package_name

## Print the path
path = os.path.join(
CONDA_ROOT,
"share",
"ggd",
species,
build,
args.package_name,
name,
version,
file_name,
)
Expand Down
5 changes: 4 additions & 1 deletion ggd/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,10 @@ def print_summary(search_terms, json_dict, match_list, installed_pkgs, installed
% installed_paths[pkg]
)
else:
results.append("\n\tTo install run:\n\t\tggd install %s" % pkg)
from .utils import check_for_meta_recipes

results.append("\n\tTo install run:\n\t\tggd install %s %s" %(pkg, "--id <meta-recipe ID>" if check_for_meta_recipes(pkg,json_dict) else "" ))

print("\n\n".join(results))
print("\n", dash)

Expand Down
90 changes: 81 additions & 9 deletions tests/test_info_scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -1505,7 +1505,7 @@ def test_predict_path():
## Testing with grch37-autosomal-dominant-genes-berg-v1 data package

## Test bad package name
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='bad_package_name-grch37-autosomal-dominant-genes-berg-v1', prefix=None)
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='bad_package_name-grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)

with pytest.raises(SystemExit) as pytest_wrapped_e:
predict_path.predict_path((), args)
Expand All @@ -1514,7 +1514,7 @@ def test_predict_path():


## Test bad file name
args = Namespace(channel='genomics', command='predict-path', file_name='autodom-genes-berg', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None)
args = Namespace(channel='genomics', command='predict-path', file_name='autodom-genes-berg', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)

with pytest.raises(SystemExit) as pytest_wrapped_e:
predict_path.predict_path((), args)
Expand All @@ -1523,7 +1523,7 @@ def test_predict_path():


## Test closest file name
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None)
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
Expand All @@ -1533,7 +1533,7 @@ def test_predict_path():


## Test closest file name
args = Namespace(channel='genomics', command='predict-path', file_name='berg-v1.compliment', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None)
args = Namespace(channel='genomics', command='predict-path', file_name='berg-v1.compliment', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
Expand All @@ -1543,7 +1543,7 @@ def test_predict_path():


## Test full name file name
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None)
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
Expand All @@ -1552,6 +1552,17 @@ def test_predict_path():
assert os.path.join(utils.conda_root(),"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output)


## Test no file-name or dir-path
args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)

temp_stdout = StringIO()
with pytest.raises(SystemExit) as pytest_wrapped_e, redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
assert "SystemExit" in str(pytest_wrapped_e.exconly()) ## test that SystemExit was raised by sys.exit()
output = temp_stdout.getvalue().strip()
assert ":ggd:predict-path: !!ERROR!! Either the '--file-name' or the '--dir-path' argument is required. Neither was given" in output


## Test prdiction in different environmnet
### Temp conda environment
temp_env = os.path.join(utils.conda_root(), "envs", "predict-path")
Expand All @@ -1565,14 +1576,76 @@ def test_predict_path():
sp.check_output(["conda", "create", "--name", "predict-path"])

## Test full name file name
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env)
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = False, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output)


## Test full name file name and that the ID is ignored for a non meta-recipe
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = False, id = "SOME ID")

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output)


## Test full name file name and dir-path. (File name should be used over dir path)
args = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = True, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") in str(output)

## Test dir path
args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = True, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1") in str(output)
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") not in str(output)


## Test dir path and that the ID is ignored for a non meta-recipe
args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=temp_env, dir_path = True, id = "SOME_ID")

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1") in str(output)
assert os.path.join(temp_env,"share","ggd", "Homo_sapiens","GRCh37","grch37-autosomal-dominant-genes-berg-v1","1","grch37-autosomal-dominant-genes-berg-v1.bed.gz.tbi") not in str(output)


## Test meta-recipe without an ID
args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='meta-recipe-geo-accession-geo-v1', prefix=temp_env, dir_path = True, id = None)

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "meta-recipe","meta-recipe","meta-recipe-geo-accession-geo-v1","1") in str(output)


## Test meta-recipe with an ID and that the id is set to lower case
args = Namespace(channel='genomics', command='predict-path', file_name=None, package_name='meta-recipe-geo-accession-geo-v1', prefix=temp_env, dir_path = True, id = "GSE123")

temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args)
output = temp_stdout.getvalue().strip()
assert os.path.join(temp_env,"share","ggd", "meta-recipe","meta-recipe","gse123-geo-v1","1") in str(output)


## Remove temp env created in test_get_environment_variables()
sp.check_output(["conda", "env", "remove", "--name", "predict-path"])
try:
Expand All @@ -1594,16 +1667,15 @@ def test_predict_path():
output = str(temp_stdout.getvalue().strip())
assert os.path.exists(str(output))

args2 = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None)
args2 = Namespace(channel='genomics', command='predict-path', file_name='grch37-autosomal-dominant-genes-berg-v1.bed.gz', package_name='grch37-autosomal-dominant-genes-berg-v1', prefix=None, dir_path = False, id = None)
temp_stdout = StringIO()
with redirect_stdout(temp_stdout):
predict_path.predict_path((), args2)
output2 = temp_stdout.getvalue().strip()

assert str(output2) == str(output)

args = Namespace(channel='genomics', command='uninstall', names=["grch37-autosomal-dominant-genes-berg-v1"])
uninstall.uninstall((),args)
sp.check_call(["ggd","uninstall","grch37-autosomal-dominant-genes-berg-v1"])


#--------------------------------------------------------
Expand Down
Loading

0 comments on commit 25f0491

Please sign in to comment.