Skip to content
This repository was archived by the owner on Aug 8, 2024. It is now read-only.

Commit 1211497

Browse files
authored
Merge pull request #40 from bids-standard/invalid_filters
FIXES: invalid filters, mark all xfails, etc..
2 parents b94864d + 7f85f8e commit 1211497

File tree

4 files changed

+187
-70
lines changed

4 files changed

+187
-70
lines changed

bids/layout/layout.py

Lines changed: 128 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import difflib
22
import enum
33
import os.path
4+
import typing
45
from collections import defaultdict
56
from functools import partial
67
from pathlib import Path
@@ -10,6 +11,7 @@
1011
from .models import BIDSFile
1112
from .utils import BIDSMetadata
1213
from .writing import build_path, write_to_file
14+
from ..external import inflect
1315
from ..exceptions import (
1416
BIDSEntityError,
1517
BIDSValidationError,
@@ -265,7 +267,7 @@ def __init__(
265267
database_path: Optional[str]=None,
266268
reset_database: Optional[bool]=None,
267269
indexer: Optional[Callable]=None,
268-
absolute_paths: Optional[bool]=None,
270+
absolute_paths: Optional[bool]=True,
269271
ignore: Optional[List[str]]=None,
270272
force_index: Optional[List[str]]=None,
271273
**kwargs,
@@ -296,6 +298,7 @@ def __init__(
296298
self.validationReport = None
297299

298300
self._regex_search = regex_search
301+
self._absolute_paths = absolute_paths
299302

300303
if validate:
301304
self.validationReport = self.validate()
@@ -318,11 +321,6 @@ def __init__(
318321
"indexer no longer has any effect and will be removed",
319322
DeprecationWarning
320323
)
321-
if absolute_paths is not None:
322-
warnings.warn(
323-
"absolute_paths no longer has any effect and will be removed",
324-
DeprecationWarning
325-
)
326324
if kwargs:
327325
warnings.warn(f"Unknown keyword arguments: {kwargs}")
328326
if config is not None:
@@ -340,12 +338,16 @@ def __getattr__(self, key):
340338
except KeyError:
341339
pass
342340
if key.startswith('get_'):
343-
orig_ent_name = key.replace('get_', '')
344-
ent_name = self.schema.fuzzy_match_entity(orig_ent_name).name
345-
if ent_name not in self.get_entities():
346-
raise BIDSEntityError(
347-
"'get_{}' can't be called because '{}' isn't a "
348-
"recognized entity name.".format(orig_ent_name, orig_ent_name))
341+
ent_name = key.replace('get_', '')
342+
entities = self.get_entities(metadata=True)
343+
if ent_name not in entities:
344+
sing = inflect.engine().singular_noun(ent_name)
345+
if sing in entities:
346+
ent_name = sing
347+
else:
348+
raise BIDSEntityError(
349+
"'get_{}' can't be called because '{}' isn't a "
350+
"recognized entity name.".format(ent_name, ent_name))
349351
return partial(self.get, return_type='id', target=ent_name)
350352
# Spit out default message if we get this far
351353
raise AttributeError("%s object has no attribute named %r" %
@@ -541,11 +543,88 @@ def count_matches(f):
541543
else match for match in matches]
542544
return matches if all_ else matches[0] if matches else None
543545

546+
def _sanitize_validate_query(self, target, filters, return_type, invalid_filters, regex_search):
547+
"""Sanitize and validate query parameters
548+
549+
Parameters
550+
----------
551+
target : str
552+
Name of the target entity to get results for.
553+
filters : dict
554+
Dictionary of filters to apply to the query.
555+
return_type : str
556+
The type of object to return. Must be one of 'filename',
557+
'object', or 'dir'.
558+
invalid_filters : str
559+
What to do when an invalid filter is encountered. Must be one
560+
of 'error', 'drop', or 'allow'.
561+
562+
Returns
563+
-------
564+
target : str
565+
The sanitized target.
566+
filters : dict
567+
The sanitized filters.
568+
"""
569+
# error check on users accidentally passing in filters
570+
if isinstance(filters.get('filters'), dict):
571+
raise RuntimeError('You passed in filters as a dictionary named '
572+
'filters; please pass the keys in as named '
573+
'keywords to the `get()` call. For example: '
574+
'`layout.get(**filters)`.')
575+
576+
schema_entities = [e.name for e in self.schema.EntityEnum]
577+
578+
# Provide some suggestions for target and filter names
579+
def _suggest(target):
580+
"""Suggest a valid value for an entity."""
581+
potential = list(schema_entities)
582+
suggestions = difflib.get_close_matches(target, potential)
583+
if suggestions:
584+
message = ". Did you mean one of: {}?".format(suggestions)
585+
else:
586+
message = ""
587+
return message
588+
589+
if return_type in ("dir", "id"):
590+
if target is None:
591+
raise TargetError(f'If return_type is "id" or "dir", a valid target '
592+
'entity must also be specified.')
593+
594+
if target not in schema_entities:
595+
raise TargetError(f"Unknown target '{target}'{_suggest(target)}")
596+
597+
if invalid_filters != 'allow':
598+
bad_filters = set(filters.keys()) - set(schema_entities)
599+
if bad_filters:
600+
if invalid_filters == 'drop':
601+
for bad_filt in bad_filters:
602+
filters.pop(bad_filt)
603+
elif invalid_filters == 'error':
604+
first_bad = list(bad_filters)[0]
605+
message = _suggest(first_bad)
606+
raise ValueError(
607+
f"Unknown entity '{first_bad}'{message} If you're sure you want to impose " + \
608+
"this constraint, set invalid_filters='allow'.")
609+
610+
# Process Query Enum
611+
if filters:
612+
for k, val in filters.items():
613+
if val == Query.OPTIONAL:
614+
del filters[k]
615+
elif val == Query.REQUIRED:
616+
regex_search = True # Force true if these are defined
617+
filters[k] = '.+'
618+
elif val == Query.NONE:
619+
regex_search = True
620+
filters[k] = '^$'
621+
622+
return target, filters, regex_search
544623

545624
def get(self, return_type: str = 'object', target: str = None, scope: str = None,
546625
extension: Union[str, List[str]] = None, suffix: Union[str, List[str]] = None,
547-
regex_search=None,
548-
**entities) -> Union[List[str], List[object]]:
626+
regex_search=None, invalid_filters: str = 'error',
627+
**filters) -> Union[List[str], List[object]]:
549628
"""Retrieve files and/or metadata from the current Layout.
550629
551630
Parameters
@@ -597,32 +676,25 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None
597676
list of :obj:`bids.layout.BIDSFile` or str
598677
A list of BIDSFiles (default) or strings (see return_type).
599678
"""
679+
600680
if regex_search is None:
601681
regex_search = self._regex_search
602682

603-
# Provide some suggestions if target is specified and invalid.
604-
if return_type in ("dir", "id"):
605-
if target is None:
606-
raise TargetError(f'If return_type is "id" or "dir", a valid target '
607-
'entity must also be specified.')
608-
self_entities = self.get_entities()
609-
if target not in self_entities:
610-
potential = list(self_entities.keys())
611-
suggestions = difflib.get_close_matches(target, potential)
612-
if suggestions:
613-
message = "Did you mean one of: {}?".format(suggestions)
614-
else:
615-
message = "Valid targets are: {}".format(potential)
616-
raise TargetError(f"Unknown target '{target}'. {message}")
617-
683+
# Sanitize & validate query
684+
target, filters, regex_search = self._sanitize_validate_query(target, filters, return_type,
685+
invalid_filters, regex_search)
686+
618687
folder = self.dataset
619-
result = query(folder, return_type, target, scope, extension, suffix, regex_search, **entities)
688+
689+
result = query(folder, return_type, target, scope, extension, suffix, regex_search,
690+
absolute_paths=self._absolute_paths, **filters)
691+
620692
if return_type == 'file':
621693
result = natural_sort(result)
622694
if return_type == "object":
623695
if result:
624696
result = natural_sort(
625-
[BIDSFile(res) for res in result],
697+
[BIDSFile(res, absolute_path=self._absolute_paths) for res in result],
626698
"path"
627699
)
628700
return result
@@ -631,7 +703,8 @@ def get(self, return_type: str = 'object', target: str = None, scope: str = None
631703
def entities(self):
632704
return self.get_entities()
633705

634-
def get_entities(self, scope: str = None, sort: bool = False, long_form: bool = True) -> dict:
706+
def get_entities(self, scope: str = None, sort: bool = False,
707+
long_form: bool = True, metadata: bool = True) -> dict:
635708
"""Returns a unique set of entities found within the dataset as a dict.
636709
Each key of the resulting dict contains a list of values (with at least one element).
637710
@@ -657,7 +730,29 @@ def get_entities(self, scope: str = None, sort: bool = False, long_form: bool =
657730
dict
658731
a unique set of entities found within the dataset as a dict
659732
"""
660-
return query_entities(self.dataset, scope, sort, long_form=long_form)
733+
734+
entities = query_entities(self.dataset, scope, long_form=long_form)
735+
736+
if metadata is True:
737+
results = {**entities, **self._get_unique_metadata()}
738+
739+
if sort:
740+
results = {k: sorted(v) for k, v in sorted(results.items())}
741+
742+
return results
743+
744+
def _get_unique_metadata(self):
745+
"""Return a list of all unique metadata key and values found in the dataset."""
746+
747+
all_metadata_objects = self.dataset.select(self.schema.MetadataArtifact).objects()
748+
749+
metadata = defaultdict(set)
750+
for obj in all_metadata_objects:
751+
for k, v in obj['contents'].items():
752+
if isinstance(v, typing.Hashable):
753+
metadata[k].add(v)
754+
755+
return metadata
661756

662757
def get_dataset_description(self, scope='self', all_=False) -> Union[List[Dict], Dict]:
663758
"""Return contents of dataset_description.json.
@@ -867,7 +962,6 @@ def __repr__(self):
867962
"Runs: {}".format(self.dataset.base_dir_, n_subjects, n_sessions, n_runs))
868963
return s
869964

870-
871965
class Query(enum.Enum):
872966
"""Enums for use with BIDSLayout.get()."""
873967
NONE = 1 # Entity must not be present

bids/layout/models.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -37,10 +37,13 @@ def from_filename(cls, filename):
3737
break
3838
return cls(path)
3939

40-
def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None):
40+
def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None,
41+
absolute_path=True):
42+
4143
self._path = None
4244
self._artifact = None
4345
self._schema = schema
46+
self._absolute_path = absolute_path
4447
if isinstance(file_ref, (str, os.PathLike)):
4548
self._path = Path(file_ref)
4649
elif isinstance(file_ref, Artifact):
@@ -51,7 +54,10 @@ def __init__(self, file_ref: Union[str, os.PathLike, Artifact], schema = None):
5154
def path(self):
5255
""" Convenience property for accessing path as a string."""
5356
try:
54-
return self._artifact.get_absolute_path()
57+
if self._absolute_path:
58+
return self._artifact.get_absolute_path()
59+
else:
60+
return self._artifact.get_relative_path()
5561
except AttributeError:
5662
return str(self._path)
5763

bids/layout/tests/conftest.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,11 @@ def layout_7t_trt_relpath():
1818
data_dir = join(get_test_data_path(), '7t_trt')
1919
return BIDSLayout(data_dir, absolute_paths=False)
2020

21-
21+
# AD: Manually ignoring derivatives for now
2222
@pytest.fixture(scope="module")
2323
def layout_ds005():
2424
data_dir = join(get_test_data_path(), 'ds005')
25-
return BIDSLayout(data_dir)
25+
return BIDSLayout(data_dir, ignore=['derivatives', 'models'])
2626

2727

2828
@pytest.fixture(scope="module")

0 commit comments

Comments
 (0)