From a2c0be36262b1781f90723da2f6a72457951740e Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 19 Nov 2025 19:43:44 +0000
Subject: [PATCH 01/13] Add detections validator

---
 ethology/io/annotations/validate.py |  75 ++---------------
 ethology/io/detections/validate.py  |  48 +++++++++++
 ethology/io/validate.py             | 121 ++++++++++++++++++++++++++++
 3 files changed, 176 insertions(+), 68 deletions(-)
 create mode 100644 ethology/io/detections/validate.py
 create mode 100644 ethology/io/validate.py

diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py
index 2e00ab92..04b81a60 100644
--- a/ethology/io/annotations/validate.py
+++ b/ethology/io/annotations/validate.py
@@ -1,13 +1,10 @@
 """Validators for annotation files and datasets."""
 
 import json
-from collections.abc import Callable
-from functools import wraps
 from pathlib import Path
 
 import pandas as pd
 import pandera.pandas as pa
-import xarray as xr
 from attrs import define, field
 from pandera.typing import Index
 
@@ -17,6 +14,7 @@
     _check_required_keys_in_dict,
     _get_default_schema,
 )
+from ethology.io.validate import ValidDataset
 
 
 @define
@@ -227,7 +225,7 @@ def _file_contains_unique_image_IDs(self, attribute, value):
 
 
 @define
-class ValidBboxesDataset:
+class ValidBboxAnnotationsDataset(ValidDataset):
     """Class for valid ``ethology`` bounding box annotations datasets.
 
     It checks that the input dataset has:
@@ -239,6 +237,10 @@ class ValidBboxesDataset:
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
+    required_dims : set
+        Set of required dimension names.
+    required_data_vars : set
+        Set of required data variable names.
 
     Raises
     ------
@@ -254,9 +256,7 @@ class ValidBboxesDataset:
 
     """
 
-    dataset: xr.Dataset = field()
-
-    # Minimum requirements for annotations datasets holding bboxes
+    # Minimum requirements for a bbox dataset holding detections
     required_dims: set = field(
         default={"image_id", "space", "id"},
         init=False,
@@ -266,32 +266,6 @@ class ValidBboxesDataset:
         init=False,
     )
 
-    @dataset.validator
-    def _check_dataset_type(self, attribute, value):
-        """Ensure the input is an xarray Dataset."""
-        if not isinstance(value, xr.Dataset):
-            raise TypeError(
-                f"Expected an xarray Dataset, but got {type(value)}."
-            )
-
-    @dataset.validator
-    def _check_required_data_variables(self, attribute, value):
-        """Ensure the dataset has all required data variables."""
-        missing_vars = self.required_data_vars - set(value.data_vars)
-        if missing_vars:
-            raise ValueError(
-                f"Missing required data variables: {sorted(missing_vars)}"
-            )
-
-    @dataset.validator
-    def _check_required_dimensions(self, attribute, value):
-        """Ensure the dataset has all required dimensions."""
-        missing_dims = self.required_dims - set(value.dims)
-        if missing_dims:
-            raise ValueError(
-                f"Missing required dimensions: {sorted(missing_dims)}"
-            )
-
 
 class ValidBboxesDataFrame(pa.DataFrameModel):
     """Class for valid bounding boxes intermediate dataframes.
@@ -573,38 +547,3 @@ def check_idx_and_annotation_id(cls, df: pd.DataFrame) -> bool:
 
         """
         return all(df.index == df["annotation_id"])
-
-
-def _check_output(validator: type):
-    """Return a decorator that validates the output of a function."""
-
-    def decorator(function: Callable) -> Callable:
-        @wraps(function)  # to preserve function metadata
-        def wrapper(*args, **kwargs):
-            result = function(*args, **kwargs)
-            validator(result)
-            return result
-
-        return wrapper
-
-    return decorator
-
-
-def _check_input(validator: type, input_index: int = 0):
-    """Return a decorator that validates a specific input of a function.
-
-    By default, the first input is validated. If the input index is
-    larger than the number of inputs, no validation is performed.
-    """
-
-    def decorator(function: Callable) -> Callable:
-        @wraps(function)
-        def wrapper(*args, **kwargs):
-            if len(args) > input_index:
-                validator(args[input_index])
-            result = function(*args, **kwargs)
-            return result
-
-        return wrapper
-
-    return decorator
diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py
new file mode 100644
index 00000000..7ef6285d
--- /dev/null
+++ b/ethology/io/detections/validate.py
@@ -0,0 +1,48 @@
+"""Validators for detection datasets."""
+
+from attrs import define, field
+
+from ethology.io.validate import ValidDataset
+
+
+@define
+class ValidBboxDetectionsDataset(ValidDataset):
+    """Class for valid ``ethology`` bounding box detections datasets.
+
+    It checks that the input dataset has:
+
+    - ``image_id``, ``space``, ``id`` as dimensions
+    - ``position``, ``shape`` and ``confidence`` as data variables
+
+    Attributes
+    ----------
+    dataset : xarray.Dataset
+        The xarray dataset to validate.
+    required_dims : set
+        Set of required dimension names.
+    required_data_vars : set
+        Set of required data variable names.
+
+    Raises
+    ------
+    TypeError
+        If the input is not an xarray Dataset.
+    ValueError
+        If the dataset is missing required data variables or dimensions.
+
+    Notes
+    -----
+    The dataset can have other data variables and dimensions, but only the
+    required ones are checked.
+
+    """
+
+    # Minimum requirements for a bbox dataset holding detections
+    required_dims: set = field(
+        default={"image_id", "space", "id"},
+        init=False,
+    )
+    required_data_vars: set = field(
+        default={"position", "shape", "confidence"},
+        init=False,
+    )
diff --git a/ethology/io/validate.py b/ethology/io/validate.py
new file mode 100644
index 00000000..22c215f9
--- /dev/null
+++ b/ethology/io/validate.py
@@ -0,0 +1,121 @@
+"""Utils for validating `ethology` objects."""
+
+from abc import ABC, abstractmethod
+from collections.abc import Callable
+from functools import wraps
+
+import xarray as xr
+from attrs import define, field
+
+
+@define
+class ValidDataset(ABC):
+    """An abstract base class for valid ``ethology`` datasets.
+
+    It checks that the input dataset has:
+
+    - required dimensions
+    - required data variables
+
+    Subclasses must define ``required_dims`` and ``required_data_vars``
+    attributes.
+
+    Attributes
+    ----------
+    dataset : xarray.Dataset
+        The xarray dataset to validate.
+    required_dims : set
+        Set of required dimension names (defined by subclasses).
+    required_data_vars : set
+        Set of required data variable names (defined by subclasses).
+
+    Raises
+    ------
+    TypeError
+        If the input is not an xarray Dataset.
+    ValueError
+        If the dataset is missing required data variables or dimensions.
+
+    Notes
+    -----
+    The dataset can have other data variables and dimensions, but only the
+    required ones are checked.
+
+    """
+
+    dataset: xr.Dataset = field()
+
+    # Subclasses should override these abstract properties
+    @property
+    @abstractmethod
+    def required_dims(self) -> set:
+        """Subclasses must provide a required_dims property."""
+        pass
+
+    @property
+    @abstractmethod
+    def required_data_vars(self) -> set:
+        """Subclasses must provide a required_data_vars property."""
+        pass
+
+    # Validators
+    @dataset.validator
+    def _check_dataset_type(self, attribute, value):
+        """Ensure the input is an xarray Dataset."""
+        if not isinstance(value, xr.Dataset):
+            raise TypeError(
+                f"Expected an xarray Dataset, but got {type(value)}."
+            )
+
+    @dataset.validator
+    def _check_required_data_variables(self, attribute, value):
+        """Ensure the dataset has all required data variables."""
+        missing_vars = self.required_data_vars - set(value.data_vars)
+        if missing_vars:
+            raise ValueError(
+                f"Missing required data variables: {sorted(missing_vars)}"
+            )
+
+    @dataset.validator
+    def _check_required_dimensions(self, attribute, value):
+        """Ensure the dataset has all required dimensions."""
+        missing_dims = self.required_dims - set(value.dims)
+        if missing_dims:
+            raise ValueError(
+                f"Missing required dimensions: {sorted(missing_dims)}"
+            )
+
+
+def _check_output(validator: type):
+    """Return a decorator that validates the output of a function."""
+
+    def decorator(function: Callable) -> Callable:
+        @wraps(function)  # to preserve function metadata
+        def wrapper(*args, **kwargs):
+            result = function(*args, **kwargs)
+            validator(result)
+            return result
+
+        return wrapper
+
+    return decorator
+
+
+def _check_input(validator: type, input_index: int = 0):
+    """Return a decorator that validates a specific input of a function.
+
+    By default, the first input is validated. If the input index is
+    larger than the number of inputs, no validation is performed.
+    """
+
+    def decorator(function: Callable) -> Callable:
+        @wraps(function)
+        def wrapper(*args, **kwargs):
+            if len(args) > input_index:
+                validator(args[input_index])
+            result = function(*args, **kwargs)
+            return result
+
+        return wrapper
+
+    return decorator

From 9e4b70fb0a758b448760595678c94ae5b033222c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 15:55:27 +0000
Subject: [PATCH 02/13] Rename validators for consistency

---
 ethology/io/annotations/load_bboxes.py        | 24 ++++++++++--------
 ethology/io/annotations/save_bboxes.py        | 25 ++++++++++---------
 ethology/io/annotations/validate.py           |  4 +--
 .../test_io_annotations/test_save_bboxes.py   |  8 +++---
 .../test_io_annotations/test_validators.py    |  6 ++---
 5 files changed, 35 insertions(+), 32 deletions(-)

diff --git a/ethology/io/annotations/load_bboxes.py b/ethology/io/annotations/load_bboxes.py
index d59d0abc..f70de2d4 100644
--- a/ethology/io/annotations/load_bboxes.py
+++ b/ethology/io/annotations/load_bboxes.py
@@ -11,15 +11,15 @@
 from pandera.typing.pandas import DataFrame
 
 from ethology.io.annotations.validate import (
-    ValidBboxesDataFrame,
-    ValidBboxesDataset,
+    ValidBboxAnnotationsDataFrame,
+    ValidBboxAnnotationsDataset,
     ValidCOCO,
     ValidVIA,
-    _check_output,
 )
+from ethology.io.validate import _check_output
 
 
-@_check_output(ValidBboxesDataset)
+@_check_output(ValidBboxAnnotationsDataset)
 def from_files(
     file_paths: Path | str | list[Path | str],
     format: Literal["VIA", "COCO"],
@@ -138,7 +138,7 @@ def from_files(
 
 
 def _get_map_attributes_from_df(
-    df: DataFrame[ValidBboxesDataFrame],
+    df: DataFrame[ValidBboxAnnotationsDataFrame],
 ) -> tuple[dict, dict]:
     """Get the map attributes from the dataframe.
 
@@ -179,7 +179,7 @@ def _get_map_attributes_from_df(
 @pa.check_types
 def _df_from_multiple_files(
     list_filepaths: list[Path | str], format: Literal["VIA", "COCO"]
-) -> DataFrame[ValidBboxesDataFrame]:
+) -> DataFrame[ValidBboxAnnotationsDataFrame]:
     """Read annotations from multiple files as a valid intermediate dataframe.
 
     Parameters
@@ -242,7 +242,7 @@ def _df_from_multiple_files(
 @pa.check_types
 def _df_from_single_file(
     file_path: Path | str, format: Literal["VIA", "COCO"]
-) -> DataFrame[ValidBboxesDataFrame]:
+) -> DataFrame[ValidBboxAnnotationsDataFrame]:
     """Read annotations from a single file as a valid intermediate dataframe.
 
     Parameters
@@ -374,7 +374,7 @@ def _df_rows_from_valid_VIA_file(file_path: Path) -> list[dict]:
 
             else:
                 supercategory, category, category_id = (
-                    ValidBboxesDataFrame.get_empty_values()[key]
+                    ValidBboxAnnotationsDataFrame.get_empty_values()[key]
                     for key in ["supercategory", "category", "category_id"]
                 )
 
@@ -428,7 +428,7 @@ def _get_image_shape_attr_as_integer(
         ValidBboxesDataFrame.get_empty_values().
 
     """
-    default_value = ValidBboxesDataFrame.get_empty_values()[
+    default_value = ValidBboxAnnotationsDataFrame.get_empty_values()[
         f"image_{attr_name}"
     ]
     try:
@@ -557,7 +557,9 @@ def _df_rows_from_valid_COCO_file(file_path: Path) -> list[dict]:
 
 
 @pa.check_types
-def _df_to_xarray_ds(df: DataFrame[ValidBboxesDataFrame]) -> xr.Dataset:
+def _df_to_xarray_ds(
+    df: DataFrame[ValidBboxAnnotationsDataFrame],
+) -> xr.Dataset:
     """Convert a bounding box annotations dataframe to an xarray dataset.
 
     Parameters
@@ -585,7 +587,7 @@ def _df_to_xarray_ds(df: DataFrame[ValidBboxesDataFrame]) -> xr.Dataset:
 
     """
     # Drop columns if all values in that column are empty
-    default_values = ValidBboxesDataFrame.get_empty_values()
+    default_values = ValidBboxAnnotationsDataFrame.get_empty_values()
     list_empty_cols = [
         col for col in default_values if all(df[col] == default_values[col])
     ]
diff --git a/ethology/io/annotations/save_bboxes.py b/ethology/io/annotations/save_bboxes.py
index bf9e09ef..d79d6ed4 100644
--- a/ethology/io/annotations/save_bboxes.py
+++ b/ethology/io/annotations/save_bboxes.py
@@ -12,16 +12,15 @@
 from pandera.typing.pandas import DataFrame
 
 from ethology.io.annotations.validate import (
-    ValidBboxesDataFrameCOCO,
-    ValidBboxesDataset,
+    ValidBboxAnnotationsCOCO,
+    ValidBboxAnnotationsDataset,
     ValidCOCO,
-    _check_input,
-    _check_output,
 )
+from ethology.io.validate import _check_input, _check_output
 
 
-@_check_input(validator=ValidBboxesDataset)
-@_check_output(validator=ValidCOCO)  # check output is ethology importable
+@_check_input(validator=ValidBboxAnnotationsDataset)
+@_check_output(validator=ValidCOCO)  # check output is ethology-importable
 def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path):
     """Save an ``ethology`` bounding box annotations dataset to a COCO file.
 
@@ -56,11 +55,11 @@ def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path):
     return output_filepath
 
 
-@_check_input(validator=ValidBboxesDataset)
+@_check_input(validator=ValidBboxAnnotationsDataset)
 @pa.check_types
 def _to_COCO_exportable_df(
     ds: xr.Dataset,
-) -> DataFrame[ValidBboxesDataFrameCOCO]:
+) -> DataFrame[ValidBboxAnnotationsCOCO]:
     """Convert dataset of bounding boxes annotations to a COCO-exportable df.
 
     The returned dataframe is validated using ValidBBoxesDataFrameCOCO.
@@ -98,7 +97,7 @@ def _to_COCO_exportable_df(
     return df[cols_to_select]
 
 
-@_check_input(validator=ValidBboxesDataset)
+@_check_input(validator=ValidBboxAnnotationsDataset)
 def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
     """Get preliminary dataframe from a dataset of bounding boxes annotations.
 
@@ -164,7 +163,7 @@ def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
 @pa.check_types
 def _add_COCO_data_to_df(
     df: pd.DataFrame, ds_attrs: dict
-) -> DataFrame[ValidBboxesDataFrameCOCO]:
+) -> DataFrame[ValidBboxAnnotationsCOCO]:
     """Add COCO-required data to preliminary dataframe.
 
     The input dataframe is obtained from a dataset of bounding boxes
@@ -266,7 +265,9 @@ def _add_COCO_data_to_df(
 
 
 @pa.check_types
-def _create_COCO_dict(df: DataFrame[ValidBboxesDataFrameCOCO]) -> dict:
+def _create_COCO_dict(
+    df: DataFrame[ValidBboxAnnotationsCOCO],
+) -> dict:
     """Extract COCO dictionary from a COCO-exportable dataframe.
 
     Parameters
@@ -282,7 +283,7 @@ def _create_COCO_dict(df: DataFrame[ValidBboxesDataFrameCOCO]) -> dict:
     """
     COCO_dict: dict[str, Any] = {}
     map_columns_to_COCO_fields = (
-        ValidBboxesDataFrameCOCO.map_df_columns_to_COCO_fields()
+        ValidBboxAnnotationsCOCO.map_df_columns_to_COCO_fields()
     )
     for sections in ["images", "categories", "annotations"]:
         # Extract and rename required columns for this section
diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py
index 04b81a60..35a82550 100644
--- a/ethology/io/annotations/validate.py
+++ b/ethology/io/annotations/validate.py
@@ -267,7 +267,7 @@ class ValidBboxAnnotationsDataset(ValidDataset):
     )
 
 
-class ValidBboxesDataFrame(pa.DataFrameModel):
+class ValidBboxAnnotationsDataFrame(pa.DataFrameModel):
     """Class for valid bounding boxes intermediate dataframes.
 
     We use this dataframe internally as an intermediate step in the process of
@@ -396,7 +396,7 @@ def get_empty_values() -> dict:
         }
 
 
-class ValidBboxesDataFrameCOCO(pa.DataFrameModel):
+class ValidBboxAnnotationsCOCO(pa.DataFrameModel):
     """Class for COCO-exportable bounding box annotations dataframes.
 
     The validation checks the required columns exist and their types are
diff --git a/tests/test_unit/test_io_annotations/test_save_bboxes.py b/tests/test_unit/test_io_annotations/test_save_bboxes.py
index fb32e978..1cab673c 100644
--- a/tests/test_unit/test_io_annotations/test_save_bboxes.py
+++ b/tests/test_unit/test_io_annotations/test_save_bboxes.py
@@ -16,7 +16,7 @@
     _get_raw_df_from_ds,
     to_COCO_file,
 )
-from ethology.io.annotations.validate import ValidBboxesDataFrameCOCO
+from ethology.io.annotations.validate import ValidBboxAnnotationsCOCO
 
 
 def read_JSON_as_dict(file_path: str | Path) -> dict:
@@ -146,7 +146,7 @@ def _sample_bboxes_df_drop(
         ).set_index("annotation_id", drop=False)
 
         # Validate as COCO-exportable
-        df = ValidBboxesDataFrameCOCO.validate(df)
+        df = ValidBboxAnnotationsCOCO.validate(df)
 
         # Drop columns if specified
         if columns_to_drop:
@@ -216,7 +216,7 @@ def test_validate_bboxes_df_COCO(
         df_factory = request.getfixturevalue(df)
         df = df_factory()
     with expected_exception as excinfo:
-        ValidBboxesDataFrameCOCO(df)
+        ValidBboxAnnotationsCOCO(df)
     if excinfo:
         assert expected_error_message in str(excinfo.value)
 
@@ -366,7 +366,7 @@ def test_create_COCO_dict(sample_bboxes_df: Callable):
 
     # Check keys in each section
     map_df_columns_to_coco = copy.deepcopy(
-        ValidBboxesDataFrameCOCO.map_df_columns_to_COCO_fields()
+        ValidBboxAnnotationsCOCO.map_df_columns_to_COCO_fields()
     )
     for section, section_mapping in map_df_columns_to_coco.items():
         assert all(
diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_io_annotations/test_validators.py
index d054da27..84de5793 100644
--- a/tests/test_unit/test_io_annotations/test_validators.py
+++ b/tests/test_unit/test_io_annotations/test_validators.py
@@ -11,7 +11,7 @@
     _extract_properties_keys,
 )
 from ethology.io.annotations.validate import (
-    ValidBboxesDataset,
+    ValidBboxAnnotationsDataset,
     ValidCOCO,
     ValidVIA,
 )
@@ -557,7 +557,7 @@ def test_valid_bboxes_dataset_validation(
     expected_error_message: str,
     request: pytest.FixtureRequest,
 ):
-    """Test ValidBboxesDataset validation with various input scenarios."""
+    """Test bbox annotations dataset validation in various input scenarios."""
     # Get dataset to validate
     if isinstance(sample_dataset, str):
         dataset = request.getfixturevalue(sample_dataset)
@@ -566,7 +566,7 @@ def test_valid_bboxes_dataset_validation(
 
     # Run validation and check exception
     with expected_exception as excinfo:
-        validator = ValidBboxesDataset(dataset=dataset)
+        validator = ValidBboxAnnotationsDataset(dataset=dataset)
 
     if excinfo:
         error_msg = str(excinfo.value)

From 9c68fa4f5a05b55934550938bdeed1f7c277c482 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 16:28:29 +0000
Subject: [PATCH 03/13] Rename fixtures and test to highlight annotations vs
 detection datasets. Add test for bbox detections dataset validation

---
 tests/fixtures/annotations.py                 |  19 +-
 tests/test_unit/test_datasets/test_split.py   |  48 ++---
 .../test_io_annotations/test_validators.py    |  20 +-
 .../test_io_detections/test_validators.py     | 183 ++++++++++++++++++
 4 files changed, 229 insertions(+), 41 deletions(-)
 create mode 100644 tests/test_unit/test_io_detections/test_validators.py

diff --git a/tests/fixtures/annotations.py b/tests/fixtures/annotations.py
index 5fd95a8f..fd8efb35 100644
--- a/tests/fixtures/annotations.py
+++ b/tests/fixtures/annotations.py
@@ -135,8 +135,8 @@ def default_COCO_schema() -> dict:
 
 # ----------------- Bboxes dataset validation fixtures -----------------
 @pytest.fixture
-def valid_bboxes_dataset():
-    """Create a valid xarray dataset for bboxes validation."""
+def valid_bbox_annotations_dataset():
+    """Create a valid bbox annotations dataset for validation."""
     image_ids = [1, 2, 3]
     annotation_ids = [0, 1, 2]  # three per frame
     space_dims = ["x", "y"]
@@ -145,13 +145,13 @@ def valid_bboxes_dataset():
     position_data = np.zeros(
         (len(image_ids), len(space_dims), len(annotation_ids))
     )
-    shape_data = np.zeros((len(image_ids), len(annotation_ids)))
+    shape_data = np.copy(position_data)
 
     # Create the dataset
     ds = xr.Dataset(
         data_vars={
             "position": (["image_id", "space", "id"], position_data),
-            "shape": (["image_id", "id"], shape_data),
+            "shape": (["image_id", "space", "id"], shape_data),
         },
         coords={
             "image_id": image_ids,
@@ -164,10 +164,15 @@ def valid_bboxes_dataset():
 
 
 @pytest.fixture
-def valid_bboxes_dataset_extra_vars_and_dims(
-    valid_bboxes_dataset: xr.Dataset,
+def valid_bbox_annotations_dataset_extra_vars_and_dims(
+    valid_bbox_annotations_dataset: xr.Dataset,
 ) -> xr.Dataset:
-    ds = valid_bboxes_dataset.copy(deep=True)
+    """Create a valid bbox annotations dataset for validation.
+
+    The dataset is valid but contains more variables and dimensions than
+    the minimum required for a bbox annotations dataset.
+    """
+    ds = valid_bbox_annotations_dataset.copy(deep=True)
     ds.coords["extra_dim"] = [10, 20, 30]
     ds["extra_var_1"] = (["image_id"], np.random.rand(len(ds.image_id)))
     ds["extra_var_2"] = (["id"], np.random.rand(len(ds.id)))
diff --git a/tests/test_unit/test_datasets/test_split.py b/tests/test_unit/test_datasets/test_split.py
index c9846262..539b448b 100644
--- a/tests/test_unit/test_datasets/test_split.py
+++ b/tests/test_unit/test_datasets/test_split.py
@@ -23,12 +23,12 @@ def split_at_any_delimiter(text: str, delimiters: list[str]) -> list[str]:
 
 
 @pytest.fixture
-def valid_bboxes_dataset_to_split_1(valid_bboxes_dataset):
+def valid_bbox_annotations_ds_to_split_1(valid_bbox_annotations_dataset):
     # We add a `foo` variable to the dataset that is
     # one-dimensional along the `image_id` dimension to
     # use for grouping by.
     # Note: len(valid_bboxes_dataset.image_id) = 3
-    ds = valid_bboxes_dataset.copy(deep=True)
+    ds = valid_bbox_annotations_dataset.copy(deep=True)
     ds["foo"] = (
         ["image_id"],
         np.array([0, 1, 1]),
@@ -37,14 +37,14 @@ def valid_bboxes_dataset_to_split_1(valid_bboxes_dataset):
 
 
 @pytest.fixture
-def valid_bboxes_dataset_to_split_2(valid_bboxes_dataset):
+def valid_bbox_annotations_ds_to_split_2(valid_bbox_annotations_dataset):
     # We add a `foo` variable to the dataset that is
     # one-dimensional along the `image_id` dimension to
     # use for grouping by. In this case we ensure we have
     # 3 groups to be able to split using 3 folds (with
     # GroupKFold we cannot have more folds than groups).
     # Note: len(valid_bboxes_dataset.image_id) = 3
-    ds = valid_bboxes_dataset.copy(deep=True)
+    ds = valid_bbox_annotations_dataset.copy(deep=True)
     ds["foo"] = (
         ["image_id"],
         np.array([0, 1, 2]),
@@ -150,12 +150,12 @@ def test_approximate_subset_sum(inputs, expected_subset_dict):
     "inputs",
     [
         {
-            "dataset": "valid_bboxes_dataset_to_split_1",
+            "dataset": "valid_bbox_annotations_ds_to_split_1",
             "list_fractions": [0.334, 0.666],
             "samples_coordinate": "image_id",
         },  # fractions in increasing order
         {
-            "dataset": "valid_bboxes_dataset_to_split_1",
+            "dataset": "valid_bbox_annotations_ds_to_split_1",
             "list_fractions": [0.666, 0.334],
             "samples_coordinate": "image_id",
         },  # fractions in decreasing order
@@ -207,12 +207,12 @@ def test_split_dataset_group_by_apss(inputs, request):
     "inputs",
     [
         {
-            "dataset": "valid_bboxes_dataset_to_split_2",
+            "dataset": "valid_bbox_annotations_ds_to_split_2",
             "list_fractions": [0.334, 0.666],
             "samples_coordinate": "image_id",
         },  # fractions in increasing order
         {
-            "dataset": "valid_bboxes_dataset_to_split_2",
+            "dataset": "valid_bbox_annotations_ds_to_split_2",
             "list_fractions": [0.666, 0.334],
             "samples_coordinate": "image_id",
         },  # fractions in decreasing order
@@ -258,10 +258,10 @@ def test_split_dataset_group_by_kfold(inputs, request):
     )
 
 
-def test_split_dataset_group_by_kfold_seed(valid_bboxes_dataset_to_split_2):
+def test_split_dataset_group_by_kfold_seed(valid_bbox_annotations_ds_to_split_2):
     """Test the behaviour of the seed when using the `kfold` method."""
     # prepare inputs
-    dataset = valid_bboxes_dataset_to_split_2
+    dataset = valid_bbox_annotations_ds_to_split_2
     list_fractions = [0.334, 0.666]
     samples_coordinate = "image_id"
     group_by_var = "foo"
@@ -313,7 +313,7 @@ def test_split_dataset_group_by_kfold_seed(valid_bboxes_dataset_to_split_2):
     ],
 )
 def test_split_dataset_group_by(
-    method, function_to_mock, valid_bboxes_dataset_to_split_1
+    method, function_to_mock, valid_bbox_annotations_ds_to_split_1
 ):
     """Test the wrapper function dispatches to the appropriate method."""
     # Create mock return datasets
@@ -322,7 +322,7 @@ def test_split_dataset_group_by(
     # Patch the internal function and call the wrapper
     with patch(function_to_mock, return_value=mock_return_value) as mock:
         _ds_subset_1, _ds_subset_2 = split_dataset_group_by(
-            dataset=valid_bboxes_dataset_to_split_1,
+            dataset=valid_bbox_annotations_ds_to_split_1,
             group_by_var="foo",
             list_fractions=[0.334, 0.666],
             samples_coordinate="image_id",
@@ -336,8 +336,8 @@ def test_split_dataset_group_by(
 @pytest.mark.parametrize(
     "dataset, expected_method",
     [
-        ("valid_bboxes_dataset_to_split_1", "apss"),
-        ("valid_bboxes_dataset_to_split_2", "kfold"),
+        ("valid_bbox_annotations_ds_to_split_1", "apss"),
+        ("valid_bbox_annotations_ds_to_split_2", "kfold"),
     ],
 )
 def test_split_dataset_group_by_auto(dataset, expected_method, request):
@@ -365,12 +365,12 @@ def test_split_dataset_group_by_auto(dataset, expected_method, request):
 
 
 def test_split_dataset_group_by_unknown_method(
-    valid_bboxes_dataset_to_split_1,
+    valid_bbox_annotations_ds_to_split_1,
 ):
     """Test that an unknown method raises a ValueError."""
     with pytest.raises(ValueError, match="Unknown method"):
         split_dataset_group_by(
-            dataset=valid_bboxes_dataset_to_split_1,
+            dataset=valid_bbox_annotations_ds_to_split_1,
             group_by_var="foo",
             list_fractions=[0.5, 0.5],
             method="unknown_method",
@@ -381,17 +381,17 @@ def test_split_dataset_group_by_unknown_method(
     "inputs",
     [
         {
-            "dataset": "valid_bboxes_dataset_to_split_1",
+            "dataset": "valid_bbox_annotations_ds_to_split_1",
             "list_fractions": [0.334, 0.666],
             "samples_coordinate": "image_id",
         },  # fractions in increasing order
         {
-            "dataset": "valid_bboxes_dataset_to_split_1",
+            "dataset": "valid_bbox_annotations_ds_to_split_1",
             "list_fractions": [0.666, 0.334],
             "samples_coordinate": "image_id",
         },  # fractions in decreasing order
         {
-            "dataset": "valid_bboxes_dataset_to_split_1",
+            "dataset": "valid_bbox_annotations_ds_to_split_1",
             "list_fractions": [1 / 3, 1 / 3, 1 / 3],
             "samples_coordinate": "image_id",
         },  # more than two fractions
@@ -444,26 +444,26 @@ def test_split_dataset_random(inputs, request):
     [
         (
             "auto",
-            "valid_bboxes_dataset_to_split_1",
+            "valid_bbox_annotations_ds_to_split_1",
             # dataset that will trigger auto-selection of apss
             # with the requested fractions 0.334 and 0.666
             "Auto-selected approximate subset-sum method",
         ),
         (
             "auto",
-            "valid_bboxes_dataset_to_split_2",
+            "valid_bbox_annotations_ds_to_split_2",
             # dataset with 3 groups so kfold method can be used
             "Using group k-fold method with",
         ),
         (
             "kfold",
-            "valid_bboxes_dataset_to_split_2",
+            "valid_bbox_annotations_ds_to_split_2",
             # dataset with 3 groups so kfold method can be used
             "Using group k-fold method with",
         ),
         (
             "apss",
-            "valid_bboxes_dataset_to_split_2",
+            "valid_bbox_annotations_ds_to_split_2",
             # dataset with 3 groups so apss method can be used
             "Using approximate subset-sum method with",
         ),
@@ -639,7 +639,7 @@ def test_split_dataset_warning_empty_subset(
 ):
     """Test that a warning is thrown when at least one subset is empty."""
     # Get dataset to split
-    ds = request.getfixturevalue("valid_bboxes_dataset_to_split_1")
+    ds = request.getfixturevalue("valid_bbox_annotations_ds_to_split_1")
     inputs["dataset"] = ds
 
     # We use fractions that will cause an empty subset
diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_io_annotations/test_validators.py
index 84de5793..823ea003 100644
--- a/tests/test_unit/test_io_annotations/test_validators.py
+++ b/tests/test_unit/test_io_annotations/test_validators.py
@@ -460,12 +460,12 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
     "sample_dataset, expected_exception, expected_error_message",
     [
         (
-            "valid_bboxes_dataset",
+            "valid_bbox_annotations_dataset",
             does_not_raise(),
             "",
         ),
         (
-            "valid_bboxes_dataset_extra_vars_and_dims",
+            "valid_bbox_annotations_dataset_extra_vars_and_dims",
             does_not_raise(),
             "",
         ),
@@ -542,16 +542,16 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
         ),
     ],
     ids=[
-        "valid_bboxes_dataset",
-        "valid_bboxes_dataset_extra_vars_and_dims",
-        "invalid_bboxes_dataset_type",
-        "invalid_bboxes_dataset_missing_data_var",
-        "invalid_bboxes_dataset_missing_multiple_data_vars",
-        "invalid_bboxes_dataset_missing_dimension",
-        "invalid_bboxes_dataset_missing_multiple_dimensions",
+        "valid_bbox_annotations_dataset",
+        "valid_bbox_annotations_dataset_extra_vars_and_dims",
+        "invalid_bbox_annotations_dataset_type",
+        "invalid_bbox_annotations_dataset_missing_data_var",
+        "invalid_bbox_annotations_dataset_missing_multiple_data_vars",
+        "invalid_bbox_annotations_dataset_missing_dimension",
+        "invalid_bbox_annotations_dataset_missing_multiple_dimensions",
     ],
 )
-def test_valid_bboxes_dataset_validation(
+def test_validator_bbox_annotations_dataset(
     sample_dataset: str | dict,
     expected_exception: pytest.raises,
     expected_error_message: str,
diff --git a/tests/test_unit/test_io_detections/test_validators.py b/tests/test_unit/test_io_detections/test_validators.py
new file mode 100644
index 00000000..2878696a
--- /dev/null
+++ b/tests/test_unit/test_io_detections/test_validators.py
@@ -0,0 +1,183 @@
+from contextlib import nullcontext as does_not_raise
+
+import numpy as np
+import pytest
+import xarray as xr
+
+from ethology.io.detections.validate import ValidBboxDetectionsDataset
+
+
+@pytest.fixture
+def valid_bbox_detections_dataset():
+    """Create a valid bbox detections dataset for validation."""
+    image_ids = [1, 2, 3]
+    annotation_ids = [0, 1, 2]  # max 3 bboxes per frame
+    space_dims = ["x", "y"]
+
+    # Create position, shape and confidence data all zeros
+    position_data = np.zeros(
+        (len(image_ids), len(space_dims), len(annotation_ids))
+    )
+    shape_data = np.copy(position_data)
+    confidence_data = np.zeros((len(image_ids), len(annotation_ids)))
+
+    # Create the dataset
+    ds = xr.Dataset(
+        data_vars={
+            "position": (["image_id", "space", "id"], position_data),
+            "shape": (["image_id", "space", "id"], shape_data),
+            "confidence": (["image_id", "id"], confidence_data),
+        },
+        coords={
+            "image_id": image_ids,
+            "space": ["x", "y"],
+            "id": annotation_ids,
+        },
+    )
+
+    return ds
+
+
+@pytest.fixture
+def valid_bbox_detections_dataset_extra_vars_and_dims(
+    valid_bbox_detections_dataset: xr.Dataset,
+) -> xr.Dataset:
+    ds = valid_bbox_detections_dataset.copy(deep=True)
+    ds.coords["extra_dim"] = [10, 20, 30]
+    ds["extra_var_1"] = (["image_id"], np.random.rand(len(ds.image_id)))
+    ds["extra_var_2"] = (["id"], np.random.rand(len(ds.id)))
+    return ds
+
+
+@pytest.mark.parametrize(
+    "sample_dataset, expected_exception, expected_error_message",
+    [
+        (
+            "valid_bbox_detections_dataset",
+            does_not_raise(),
+            "",
+        ),
+        (
+            "valid_bbox_detections_dataset_extra_vars_and_dims",
+            does_not_raise(),
+            "",
+        ),
+        (
+            {"position": [1, 2, 3], "shape": [4, 5, 6]},
+            pytest.raises(TypeError),
+            "Expected an xarray Dataset, but got <class 'dict'>.",
+        ),
+        (
+            xr.Dataset(
+                coords={
+                    "image_id": np.arange(3),
+                    "space": ["x", "y"],
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                },
+            ),
+            pytest.raises(ValueError),
+            "Missing required data variables: ['confidence']",
+        ),
+        (
+            xr.Dataset(
+                coords={
+                    "image_id": np.arange(3),
+                    "space": ["x", "y"],
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                },
+            ),
+            pytest.raises(ValueError),
+            "Missing required data variables: ['confidence', 'shape']",
+        ),
+        (
+            xr.Dataset(
+                coords={"image_id": np.arange(3), "id": np.arange(2)},
+                data_vars={
+                    "position": (["image_id", "id"], np.zeros((3, 2))),
+                    "shape": (["image_id", "id"], np.zeros((3, 2))),
+                    "confidence": (["image_id", "id"], np.zeros((3, 2))),
+                },
+            ),
+            pytest.raises(ValueError),
+            "Missing required dimensions: ['space']",
+        ),
+        (
+            xr.Dataset(
+                coords={
+                    "foo": np.arange(3),
+                    "bar": ["x", "y"],
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["foo", "bar", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["foo", "bar", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "confidence": (
+                        ["foo", "id"],
+                        np.zeros((3, 2)),
+                    ),
+                },
+            ),
+            pytest.raises(ValueError),
+            "Missing required dimensions: ['image_id', 'space']",
+        ),
+    ],
+    ids=[
+        "valid_bbox_detections_dataset",
+        "valid_bbox_detections_dataset_extra_vars_and_dims",
+        "invalid_bbox_detections_dataset_type",
+        "invalid_bbox_detections_dataset_missing_data_var",
+        "invalid_bbox_detections_missing_multiple_data_vars",
+        "invalid_bbox_detections_missing_dimension",
+        "invalid_bbox_detections_missing_multiple_dimensions",
+    ],
+)
+def test_validator_bbox_detections_dataset(
+    sample_dataset: str | dict,
+    expected_exception: pytest.raises,
+    expected_error_message: str,
+    request: pytest.FixtureRequest,
+):
+    """Test bbox annotations dataset validation in various input scenarios."""
+    # Get dataset to validate
+    if isinstance(sample_dataset, str):
+        dataset = request.getfixturevalue(sample_dataset)
+    else:
+        dataset = sample_dataset
+
+    # Run validation and check exception
+    with expected_exception as excinfo:
+        validator = ValidBboxDetectionsDataset(dataset=dataset)
+
+    if excinfo:
+        error_msg = str(excinfo.value)
+        assert error_msg in expected_error_message
+    else:
+        assert validator.dataset is dataset
+        assert validator.required_dims == {"image_id", "space", "id"}
+        assert validator.required_data_vars == {
+            "confidence",
+            "position",
+            "shape",
+        }

From 78eadb93e68fc288acc0784739dbe95d0a981364 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 20 Nov 2025 16:29:06 +0000
Subject: [PATCH 04/13] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 tests/test_unit/test_datasets/test_split.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tests/test_unit/test_datasets/test_split.py b/tests/test_unit/test_datasets/test_split.py
index 539b448b..23674cdf 100644
--- a/tests/test_unit/test_datasets/test_split.py
+++ b/tests/test_unit/test_datasets/test_split.py
@@ -258,7 +258,9 @@ def test_split_dataset_group_by_kfold(inputs, request):
     )
 
 
-def test_split_dataset_group_by_kfold_seed(valid_bbox_annotations_ds_to_split_2):
+def test_split_dataset_group_by_kfold_seed(
+    valid_bbox_annotations_ds_to_split_2,
+):
     """Test the behaviour of the seed when using the `kfold` method."""
     # prepare inputs
     dataset = valid_bbox_annotations_ds_to_split_2

From ccc9ca945ce8c76fa72d456a97b6dc8c651bf02d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:15:03 +0000
Subject: [PATCH 05/13] Extend validator to check minimum dimensions per data
 variable too. Extend tests

---
 ethology/io/annotations/validate.py           | 11 +--
 ethology/io/detections/validate.py            | 12 ++--
 ethology/io/validate.py                       | 20 +++++-
 .../test_io_annotations/test_validators.py    | 66 +++++++++++++++---
 .../test_io_detections/test_validators.py     | 67 +++++++++++++++++--
 5 files changed, 152 insertions(+), 24 deletions(-)

diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py
index 35a82550..87229879 100644
--- a/ethology/io/annotations/validate.py
+++ b/ethology/io/annotations/validate.py
@@ -239,8 +239,8 @@ class ValidBboxAnnotationsDataset(ValidDataset):
         The xarray dataset to validate.
     required_dims : set
         Set of required dimension names.
-    required_data_vars : set
-        Set of required data variable names.
+    required_data_vars : dict[str, set]
+        A dictionary mapping data variable names to their required dimensions.
 
     Raises
     ------
@@ -261,8 +261,11 @@ class ValidBboxAnnotationsDataset(ValidDataset):
         default={"image_id", "space", "id"},
         init=False,
     )
-    required_data_vars: set = field(
-        default={"position", "shape"},
+    required_data_vars: dict = field(
+        default={
+            "position": {"image_id", "space", "id"},
+            "shape": {"image_id", "space", "id"},
+        },
         init=False,
     )
 
diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py
index 7ef6285d..ceff454b 100644
--- a/ethology/io/detections/validate.py
+++ b/ethology/io/detections/validate.py
@@ -20,8 +20,8 @@ class ValidBboxDetectionsDataset(ValidDataset):
         The xarray dataset to validate.
     required_dims : set
         Set of required dimension names.
-    required_data_vars : set
-        Set of required data variable names.
+    required_data_vars : dict[str, set]
+        A dictionary mapping data variable names to their required dimensions.
 
     Raises
     ------
@@ -42,7 +42,11 @@ class ValidBboxDetectionsDataset(ValidDataset):
         default={"image_id", "space", "id"},
         init=False,
     )
-    required_data_vars: set = field(
-        default={"position", "shape", "confidence"},
+    required_data_vars: dict = field(
+        default={
+            "position": {"image_id", "space", "id"},
+            "shape": {"image_id", "space", "id"},
+            "confidence": {"image_id", "id"},
+        },
         init=False,
     )
diff --git a/ethology/io/validate.py b/ethology/io/validate.py
index 22c215f9..7c981dce 100644
--- a/ethology/io/validate.py
+++ b/ethology/io/validate.py
@@ -54,7 +54,7 @@ def required_dims(self) -> set:
 
     @property
     @abstractmethod
-    def required_data_vars(self) -> set:
+    def required_data_vars(self) -> dict[str, set]:
         """Subclasses must provide a required_data_vars property."""
         pass
 
@@ -70,7 +70,7 @@ def _check_dataset_type(self, attribute, value):
     @dataset.validator
     def _check_required_data_variables(self, attribute, value):
         """Ensure the dataset has all required data variables."""
-        missing_vars = self.required_data_vars - set(value.data_vars)
+        missing_vars = self.required_data_vars.keys() - set(value.data_vars)
         if missing_vars:
             raise ValueError(
                 f"Missing required data variables: {sorted(missing_vars)}"
@@ -85,6 +85,22 @@ def _check_required_dimensions(self, attribute, value):
                 f"Missing required dimensions: {sorted(missing_dims)}"
             )
 
+    @dataset.validator
+    def _check_dimensions_per_data_variable(self, attribute, value):
+        """Ensure the dataset has all required dimensions."""
+        for (
+            data_var,
+            required_dims_in_data_var,
+        ) in self.required_data_vars.items():
+            missing_dims = required_dims_in_data_var - set(
+                value.data_vars[data_var].coords
+            )
+            if missing_dims:
+                raise ValueError(
+                    f"Missing required dimensions ({sorted(missing_dims)}) "
+                    f"in data variable '{data_var}'."
+                )
+
 
 def _check_output(validator: type):
     """Return a decorator that validates the output of a function."""
diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_io_annotations/test_validators.py
index 823ea003..553514ab 100644
--- a/tests/test_unit/test_io_annotations/test_validators.py
+++ b/tests/test_unit/test_io_annotations/test_validators.py
@@ -469,6 +469,27 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
             does_not_raise(),
             "",
         ),
+        (
+            xr.Dataset(
+                coords={
+                    "image_id": np.arange(3),
+                    "space": np.arange(2),
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["image_id", "space", "id", "foo"],
+                        np.zeros((3, 2, 2, 1)),
+                    ),
+                },
+            ),
+            does_not_raise(),
+            "",
+        ),
         (
             {"position": [1, 2, 3], "shape": [4, 5, 6]},
             pytest.raises(TypeError),
@@ -540,15 +561,41 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
             pytest.raises(ValueError),
             "Missing required dimensions: ['image_id', 'space']",
         ),
+        (
+            xr.Dataset(
+                coords={
+                    "image_id": np.arange(3),
+                    "space": np.arange(2),
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["image_id", "id"],
+                        np.zeros((3, 2)),
+                    ),
+                },
+            ),
+            pytest.raises(ValueError),
+            (
+                "Missing required dimensions (['space']) "
+                "in data variable 'shape'."
+            ),
+        ),
     ],
     ids=[
-        "valid_bbox_annotations_dataset",
-        "valid_bbox_annotations_dataset_extra_vars_and_dims",
-        "invalid_bbox_annotations_dataset_type",
-        "invalid_bbox_annotations_dataset_missing_data_var",
-        "invalid_bbox_annotations_dataset_missing_multiple_data_vars",
-        "invalid_bbox_annotations_dataset_missing_dimension",
-        "invalid_bbox_annotations_dataset_missing_multiple_dimensions",
+        "valid_bbox_annotations",
+        "valid_bbox_annotations_extra_vars_and_dims",
+        "valid_bbox_detections_extra_dims_in_shape_var",
+        "invalid_bbox_annotations_type",
+        "invalid_bbox_annotations_missing_data_var",
+        "invalid_bbox_annotations_missing_multiple_data_vars",
+        "invalid_bbox_annotations_missing_dimension",
+        "invalid_bbox_annotations_missing_multiple_dimensions",
+        "invalid_bbox_annotations_missing_dimension_in_data_var",
     ],
 )
 def test_validator_bbox_annotations_dataset(
@@ -574,4 +621,7 @@ def test_validator_bbox_annotations_dataset(
     else:
         assert validator.dataset is dataset
         assert validator.required_dims == {"image_id", "space", "id"}
-        assert validator.required_data_vars == {"position", "shape"}
+        assert validator.required_data_vars == {
+            "position": {"id", "image_id", "space"},
+            "shape": {"id", "image_id", "space"},
+        }
diff --git a/tests/test_unit/test_io_detections/test_validators.py b/tests/test_unit/test_io_detections/test_validators.py
index 2878696a..b26834aa 100644
--- a/tests/test_unit/test_io_detections/test_validators.py
+++ b/tests/test_unit/test_io_detections/test_validators.py
@@ -62,6 +62,31 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
             does_not_raise(),
             "",
         ),
+        (
+            xr.Dataset(
+                coords={
+                    "image_id": np.arange(3),
+                    "space": np.arange(2),
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["image_id", "space", "id", "foo"],
+                        np.zeros((3, 2, 2, 1)),
+                    ),
+                    "confidence": (
+                        ["image_id", "id"],
+                        np.zeros((3, 2)),
+                    ),
+                },
+            ),
+            does_not_raise(),
+            "",
+        ),
         (
             {"position": [1, 2, 3], "shape": [4, 5, 6]},
             pytest.raises(TypeError),
@@ -142,15 +167,45 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
             pytest.raises(ValueError),
             "Missing required dimensions: ['image_id', 'space']",
         ),
+        (
+            xr.Dataset(
+                coords={
+                    "image_id": np.arange(3),
+                    "space": np.arange(2),
+                    "id": np.arange(2),
+                },
+                data_vars={
+                    "position": (
+                        ["image_id", "space", "id"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["image_id", "id"],
+                        np.zeros((3, 2)),
+                    ),
+                    "confidence": (
+                        ["image_id", "id"],
+                        np.zeros((3, 2)),
+                    ),
+                },
+            ),
+            pytest.raises(ValueError),
+            (
+                "Missing required dimensions (['space']) "
+                "in data variable 'shape'."
+            ),
+        ),
     ],
     ids=[
-        "valid_bbox_detections_dataset",
-        "valid_bbox_detections_dataset_extra_vars_and_dims",
-        "invalid_bbox_detections_dataset_type",
+        "valid_bbox_detections",
+        "valid_bbox_detections_extra_vars_and_dims",
+        "valid_bbox_detections_extra_dims_in_shape_var",
+        "invalid_bbox_detections_type",
         "invalid_bbox_detections_dataset_missing_data_var",
         "invalid_bbox_detections_missing_multiple_data_vars",
         "invalid_bbox_detections_missing_dimension",
         "invalid_bbox_detections_missing_multiple_dimensions",
+        "invalid_bbox_detections_missing_dimension_in_data_var",
     ],
 )
 def test_validator_bbox_detections_dataset(
@@ -177,7 +232,7 @@ def test_validator_bbox_detections_dataset(
         assert validator.dataset is dataset
         assert validator.required_dims == {"image_id", "space", "id"}
         assert validator.required_data_vars == {
-            "confidence",
-            "position",
-            "shape",
+            "position": {"image_id", "space", "id"},
+            "shape": {"image_id", "space", "id"},
+            "confidence": {"image_id", "id"},
         }

From 575c56b434fae05a811d5a26af3d796bc4ef740f Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:27:45 +0000
Subject: [PATCH 06/13] Improve error message

---
 ethology/io/validate.py                       | 20 +++++++++++--------
 .../test_io_annotations/test_validators.py    |  4 ++--
 .../test_io_detections/test_validators.py     |  4 ++--
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/ethology/io/validate.py b/ethology/io/validate.py
index 7c981dce..d82d2378 100644
--- a/ethology/io/validate.py
+++ b/ethology/io/validate.py
@@ -88,19 +88,23 @@ def _check_required_dimensions(self, attribute, value):
     @dataset.validator
     def _check_dimensions_per_data_variable(self, attribute, value):
         """Ensure the dataset has all required dimensions."""
-        for (
-            data_var,
-            required_dims_in_data_var,
-        ) in self.required_data_vars.items():
-            missing_dims = required_dims_in_data_var - set(
+        error_messages = []
+        for data_var, dims_per_data_var in self.required_data_vars.items():
+            missing_dims = dims_per_data_var - set(
                 value.data_vars[data_var].coords
             )
             if missing_dims:
-                raise ValueError(
-                    f"Missing required dimensions ({sorted(missing_dims)}) "
-                    f"in data variable '{data_var}'."
+                error_messages.append(
+                    f"data variable '{data_var}' is missing "
+                    f"dimensions {sorted(missing_dims)}"
                 )
 
+        if error_messages:
+            raise ValueError(
+                "Some data variables are missing required dimensions:\n  - "
+                + "\n  - ".join(error_messages)
+            )
+
 
 def _check_output(validator: type):
     """Return a decorator that validates the output of a function."""
diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_io_annotations/test_validators.py
index 553514ab..599f942a 100644
--- a/tests/test_unit/test_io_annotations/test_validators.py
+++ b/tests/test_unit/test_io_annotations/test_validators.py
@@ -581,8 +581,8 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
             ),
             pytest.raises(ValueError),
             (
-                "Missing required dimensions (['space']) "
-                "in data variable 'shape'."
+                "Some data variables are missing required dimensions:"
+                "\n  - data variable 'shape' is missing dimensions ['space']"
             ),
         ),
     ],
diff --git a/tests/test_unit/test_io_detections/test_validators.py b/tests/test_unit/test_io_detections/test_validators.py
index b26834aa..aa4abf9b 100644
--- a/tests/test_unit/test_io_detections/test_validators.py
+++ b/tests/test_unit/test_io_detections/test_validators.py
@@ -191,8 +191,8 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
             ),
             pytest.raises(ValueError),
             (
-                "Missing required dimensions (['space']) "
-                "in data variable 'shape'."
+                "Some data variables are missing required dimensions:"
+                "\n  - data variable 'shape' is missing dimensions ['space']"
             ),
         ),
     ],

From bbb9c5b00d0a372460658bed313307a277001289 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:30:13 +0000
Subject: [PATCH 07/13] Ignore code cov in abstract base class

---
 ethology/io/validate.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ethology/io/validate.py b/ethology/io/validate.py
index d82d2378..4739f32d 100644
--- a/ethology/io/validate.py
+++ b/ethology/io/validate.py
@@ -50,13 +50,13 @@ class ValidDataset(ABC):
     @abstractmethod
     def required_dims(self) -> set:
         """Subclasses must provide a required_dims property."""
-        pass
+        pass  # pragma: no cover
 
     @property
     @abstractmethod
     def required_data_vars(self) -> dict[str, set]:
         """Subclasses must provide a required_data_vars property."""
-        pass
+        pass  # pragma: no cover
 
     # Validators
     @dataset.validator

From d0f8991fef7efbbd2dac6b4322a06e427bfe68dd Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:55:19 +0000
Subject: [PATCH 08/13] Factor out validators module

---
 ethology/io/annotations/load_bboxes.py        |   4 +-
 ethology/io/annotations/save_bboxes.py        |   4 +-
 .../validate.py => validators/annotations.py} |   4 +-
 .../validate.py => validators/detections.py}  |   2 +-
 .../json_schemas/__init__.py                  |   0
 .../json_schemas/schemas/COCO_schema.json     |   0
 .../json_schemas/schemas/README.md            |   0
 .../json_schemas/schemas/VIA_schema.json      |   0
 .../json_schemas/utils.py                     |   0
 .../{io/validate.py => validators/utils.py}   |   0
 tests/fixtures/annotations.py                 |  16 --
 .../test_io_annotations/test_save_bboxes.py   |   2 +-
 .../test_annotations.py}                      | 249 +---------------
 .../test_detections.py}                       |   2 +-
 .../test_validators/test_json_schemas.py      | 268 ++++++++++++++++++
 15 files changed, 278 insertions(+), 273 deletions(-)
 rename ethology/{io/annotations/validate.py => validators/annotations.py} (99%)
 rename ethology/{io/detections/validate.py => validators/detections.py} (96%)
 rename ethology/{io/annotations => validators}/json_schemas/__init__.py (100%)
 rename ethology/{io/annotations => validators}/json_schemas/schemas/COCO_schema.json (100%)
 rename ethology/{io/annotations => validators}/json_schemas/schemas/README.md (100%)
 rename ethology/{io/annotations => validators}/json_schemas/schemas/VIA_schema.json (100%)
 rename ethology/{io/annotations => validators}/json_schemas/utils.py (100%)
 rename ethology/{io/validate.py => validators/utils.py} (100%)
 rename tests/test_unit/{test_io_annotations/test_validators.py => test_validators/test_annotations.py} (58%)
 rename tests/test_unit/{test_io_detections/test_validators.py => test_validators/test_detections.py} (99%)
 create mode 100644 tests/test_unit/test_validators/test_json_schemas.py

diff --git a/ethology/io/annotations/load_bboxes.py b/ethology/io/annotations/load_bboxes.py
index f70de2d4..47f01fe4 100644
--- a/ethology/io/annotations/load_bboxes.py
+++ b/ethology/io/annotations/load_bboxes.py
@@ -10,13 +10,13 @@
 import xarray as xr
 from pandera.typing.pandas import DataFrame
 
-from ethology.io.annotations.validate import (
+from ethology.validators.annotations import (
     ValidBboxAnnotationsDataFrame,
     ValidBboxAnnotationsDataset,
     ValidCOCO,
     ValidVIA,
 )
-from ethology.io.validate import _check_output
+from ethology.validators.utils import _check_output
 
 
 @_check_output(ValidBboxAnnotationsDataset)
diff --git a/ethology/io/annotations/save_bboxes.py b/ethology/io/annotations/save_bboxes.py
index d79d6ed4..a21ecf93 100644
--- a/ethology/io/annotations/save_bboxes.py
+++ b/ethology/io/annotations/save_bboxes.py
@@ -11,12 +11,12 @@
 import xarray as xr
 from pandera.typing.pandas import DataFrame
 
-from ethology.io.annotations.validate import (
+from ethology.validators.annotations import (
     ValidBboxAnnotationsCOCO,
     ValidBboxAnnotationsDataset,
     ValidCOCO,
 )
-from ethology.io.validate import _check_input, _check_output
+from ethology.validators.utils import _check_input, _check_output
 
 
 @_check_input(validator=ValidBboxAnnotationsDataset)
diff --git a/ethology/io/annotations/validate.py b/ethology/validators/annotations.py
similarity index 99%
rename from ethology/io/annotations/validate.py
rename to ethology/validators/annotations.py
index 87229879..84653110 100644
--- a/ethology/io/annotations/validate.py
+++ b/ethology/validators/annotations.py
@@ -8,13 +8,13 @@
 from attrs import define, field
 from pandera.typing import Index
 
-from ethology.io.annotations.json_schemas.utils import (
+from ethology.validators.json_schemas.utils import (
     _check_file_is_json,
     _check_file_matches_schema,
     _check_required_keys_in_dict,
     _get_default_schema,
 )
-from ethology.io.validate import ValidDataset
+from ethology.validators.utils import ValidDataset
 
 
 @define
diff --git a/ethology/io/detections/validate.py b/ethology/validators/detections.py
similarity index 96%
rename from ethology/io/detections/validate.py
rename to ethology/validators/detections.py
index ceff454b..62bb609e 100644
--- a/ethology/io/detections/validate.py
+++ b/ethology/validators/detections.py
@@ -2,7 +2,7 @@
 
 from attrs import define, field
 
-from ethology.io.validate import ValidDataset
+from ethology.validators.utils import ValidDataset
 
 
 @define
diff --git a/ethology/io/annotations/json_schemas/__init__.py b/ethology/validators/json_schemas/__init__.py
similarity index 100%
rename from ethology/io/annotations/json_schemas/__init__.py
rename to ethology/validators/json_schemas/__init__.py
diff --git a/ethology/io/annotations/json_schemas/schemas/COCO_schema.json b/ethology/validators/json_schemas/schemas/COCO_schema.json
similarity index 100%
rename from ethology/io/annotations/json_schemas/schemas/COCO_schema.json
rename to ethology/validators/json_schemas/schemas/COCO_schema.json
diff --git a/ethology/io/annotations/json_schemas/schemas/README.md b/ethology/validators/json_schemas/schemas/README.md
similarity index 100%
rename from ethology/io/annotations/json_schemas/schemas/README.md
rename to ethology/validators/json_schemas/schemas/README.md
diff --git a/ethology/io/annotations/json_schemas/schemas/VIA_schema.json b/ethology/validators/json_schemas/schemas/VIA_schema.json
similarity index 100%
rename from ethology/io/annotations/json_schemas/schemas/VIA_schema.json
rename to ethology/validators/json_schemas/schemas/VIA_schema.json
diff --git a/ethology/io/annotations/json_schemas/utils.py b/ethology/validators/json_schemas/utils.py
similarity index 100%
rename from ethology/io/annotations/json_schemas/utils.py
rename to ethology/validators/json_schemas/utils.py
diff --git a/ethology/io/validate.py b/ethology/validators/utils.py
similarity index 100%
rename from ethology/io/validate.py
rename to ethology/validators/utils.py
diff --git a/tests/fixtures/annotations.py b/tests/fixtures/annotations.py
index fd8efb35..eb3830fa 100644
--- a/tests/fixtures/annotations.py
+++ b/tests/fixtures/annotations.py
@@ -117,22 +117,6 @@ def small_schema() -> dict:
     }
 
 
-@pytest.fixture()
-def default_VIA_schema() -> dict:
-    """Get default VIA schema."""
-    from ethology.io.annotations.json_schemas.utils import _get_default_schema
-
-    return _get_default_schema("VIA")
-
-
-@pytest.fixture()
-def default_COCO_schema() -> dict:
-    """Get default COCO schema."""
-    from ethology.io.annotations.json_schemas.utils import _get_default_schema
-
-    return _get_default_schema("COCO")
-
-
 # ----------------- Bboxes dataset validation fixtures -----------------
 @pytest.fixture
 def valid_bbox_annotations_dataset():
diff --git a/tests/test_unit/test_io_annotations/test_save_bboxes.py b/tests/test_unit/test_io_annotations/test_save_bboxes.py
index 1cab673c..21a0c60f 100644
--- a/tests/test_unit/test_io_annotations/test_save_bboxes.py
+++ b/tests/test_unit/test_io_annotations/test_save_bboxes.py
@@ -16,7 +16,7 @@
     _get_raw_df_from_ds,
     to_COCO_file,
 )
-from ethology.io.annotations.validate import ValidBboxAnnotationsCOCO
+from ethology.validators.annotations import ValidBboxAnnotationsCOCO
 
 
 def read_JSON_as_dict(file_path: str | Path) -> dict:
diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_validators/test_annotations.py
similarity index 58%
rename from tests/test_unit/test_io_annotations/test_validators.py
rename to tests/test_unit/test_validators/test_annotations.py
index 599f942a..fbcc77ee 100644
--- a/tests/test_unit/test_io_annotations/test_validators.py
+++ b/tests/test_unit/test_validators/test_annotations.py
@@ -5,12 +5,7 @@
 import pytest
 import xarray as xr
 
-from ethology.io.annotations.json_schemas.utils import (
-    _check_required_keys_in_dict,
-    _check_required_properties_keys,
-    _extract_properties_keys,
-)
-from ethology.io.annotations.validate import (
+from ethology.validators.annotations import (
     ValidBboxAnnotationsDataset,
     ValidCOCO,
     ValidVIA,
@@ -101,248 +96,6 @@ def test_validators_invalid_input_files(
         assert invalid_json_file.name in str(excinfo.value)
 
 
-@pytest.mark.parametrize(
-    "schema, expected_properties_keys",
-    [
-        ("small_schema", ["a", "b", "b/b1", "c", "c/c1", "c/c2"]),
-        (
-            "default_VIA_schema",
-            [
-                "_via_attributes",
-                "_via_attributes/file",
-                "_via_attributes/region",
-                "_via_attributes/region/default_options",
-                "_via_attributes/region/description",
-                "_via_attributes/region/options",
-                "_via_attributes/region/type",
-                "_via_data_format_version",
-                "_via_image_id_list",
-                "_via_img_metadata",
-                "_via_img_metadata/file_attributes",
-                "_via_img_metadata/filename",
-                "_via_img_metadata/regions",
-                "_via_img_metadata/regions/region_attributes",
-                "_via_img_metadata/regions/shape_attributes",
-                "_via_img_metadata/regions/shape_attributes/height",
-                "_via_img_metadata/regions/shape_attributes/name",
-                "_via_img_metadata/regions/shape_attributes/width",
-                "_via_img_metadata/regions/shape_attributes/x",
-                "_via_img_metadata/regions/shape_attributes/y",
-                "_via_img_metadata/size",
-                "_via_settings",
-                "_via_settings/core",
-                "_via_settings/project",
-                "_via_settings/ui",
-            ],
-        ),
-        (
-            "default_COCO_schema",
-            [
-                "annotations",
-                "annotations/area",
-                "annotations/bbox",
-                "annotations/category_id",
-                "annotations/id",
-                "annotations/image_id",
-                "annotations/iscrowd",
-                "categories",
-                "categories/id",
-                "categories/name",
-                "categories/supercategory",
-                "images",
-                "images/file_name",
-                "images/height",
-                "images/id",
-                "images/width",
-                "info",
-                "licenses",
-            ],
-        ),
-    ],
-)
-def test_extract_properties_keys(
-    schema: dict,
-    expected_properties_keys: list,
-    request: pytest.FixtureRequest,
-):
-    """Test the _extract_properties_keys helper function."""
-    schema = request.getfixturevalue(schema)
-    assert _extract_properties_keys(schema) == sorted(expected_properties_keys)
-
-
-@pytest.mark.parametrize(
-    (
-        "list_required_keys, input_dict, additional_message, "
-        "expected_exception, expected_message"
-    ),
-    [
-        (
-            ["images", "annotations", "categories"],
-            {
-                "images": [1, 2, 3],
-                "annotations": [1, 2, 3],
-                "categories": [1, 2, 3],
-            },
-            "",
-            does_not_raise(),
-            "",
-        ),  # zero missing keys, and all keys map to non-empty values
-        (
-            ["images", "annotations", "categories"],
-            {
-                "images": [],
-                "annotations": [1, 2, 3],
-                "categories": [1, 2, 3],
-            },
-            "",
-            pytest.raises(ValueError),
-            "Empty value(s) found for the required key(s) ['images'].",
-        ),  # zero missing keys, but one ("images") maps to empty values
-        (
-            ["images", "annotations", "categories"],
-            {
-                "images": [],
-                "annotations": {},
-                "categories": [1, 2, 3],
-            },
-            "",
-            pytest.raises(ValueError),
-            (
-                "Empty value(s) found for the required key(s) "
-                "['annotations', 'images']."
-            ),
-        ),  # zero missing keys, but two keys map to empty values
-        (
-            ["images", "annotations", "categories"],
-            {"annotations": "", "categories": ""},
-            "",
-            pytest.raises(ValueError),
-            "Required key(s) ['images'] not found.",
-        ),  # one missing key
-        (
-            ["images", "annotations", "categories"],
-            {"annotations": ""},
-            "",
-            pytest.raises(ValueError),
-            "Required key(s) ['categories', 'images'] not found.",
-        ),  # two missing keys
-        (
-            ["images", "annotations", "categories"],
-            {"annotations": "", "categories": ""},
-            "FOO",
-            pytest.raises(ValueError),
-            "Required key(s) ['images'] not foundFOO.",
-        ),  # one missing key with additional message for missing keys
-    ],
-)
-def test_check_required_keys_in_dict(
-    list_required_keys: list,
-    input_dict: dict,
-    additional_message: str,
-    expected_exception: pytest.raises,
-    expected_message: str,
-):
-    """Test the _check_required_keys_in_dict helper function.
-
-    The check verifies that the required keys are defined in the input
-    dictionary and if they are, it checks that they do not map to empty
-    values.
-    """
-    with expected_exception as excinfo:
-        _check_required_keys_in_dict(
-            list_required_keys, input_dict, additional_message
-        )
-
-    # Check error message
-    if excinfo:
-        assert expected_message in str(excinfo.value)
-
-
-def test_check_required_properties_keys(small_schema: dict):
-    """Test the _check_required_keys helper function."""
-    # Define a sample schema from "small_schema"
-    # with a "properties" key missing (e.g. "c/c2")
-    small_schema["properties"]["c"]["properties"].pop("c2")
-
-    # Define required "properties" keys
-    required_keys = ["a", "b", "c/c2"]
-
-    # Run check
-    with pytest.raises(ValueError) as excinfo:
-        _check_required_properties_keys(required_keys, small_schema)
-
-    # Check error message
-    assert "Required key(s) ['c/c2'] not found in schema" in str(excinfo.value)
-
-
-@pytest.mark.parametrize(
-    "input_file,",
-    [
-        "VIA_JSON_sample_1.json",
-        "VIA_JSON_sample_2.json",
-    ],
-)
-def test_required_keys_in_provided_VIA_schema(
-    input_file: str, default_VIA_schema: dict, annotations_test_data: dict
-):
-    """Check the provided VIA schema contains the ValidVIA required keys."""
-    # Get required keys from a VIA valid file
-    filepath = annotations_test_data[input_file]
-    valid_VIA = ValidVIA(path=filepath)
-    required_VIA_keys = valid_VIA.required_keys
-
-    # Map required keys to "properties" keys in schema
-    map_required_to_properties_keys = {
-        "main": "",
-        "images": "_via_img_metadata",
-        "regions": "_via_img_metadata/regions",
-        "shape_attributes": "_via_img_metadata/regions/shape_attributes",
-    }
-
-    # Express required keys as required "properties" keys
-    required_property_keys = [
-        val if ky == "main" else f"{map_required_to_properties_keys[ky]}/{val}"
-        for ky, values in required_VIA_keys.items()
-        for val in values
-    ]
-
-    # Run check
-    _check_required_properties_keys(
-        required_property_keys,
-        default_VIA_schema,
-    )
-
-
-@pytest.mark.parametrize(
-    "input_file,",
-    [
-        "COCO_JSON_sample_1.json",
-        "COCO_JSON_sample_2.json",
-    ],
-)
-def test_required_keys_in_provided_COCO_schema(
-    input_file: str, default_COCO_schema: dict, annotations_test_data: dict
-):
-    """Check the provided COCO schema contains the ValidCOCO required keys."""
-    # Get required keys from a COCO valid file
-    filepath = annotations_test_data[input_file]
-    valid_COCO = ValidCOCO(path=filepath)
-    required_COCO_keys = valid_COCO.required_keys
-
-    # Prepare list of required "properties" keys with full paths
-    required_properties_keys = [
-        f"{level}/{ky}" if level != "main" else ky
-        for level, required_keys in required_COCO_keys.items()
-        for ky in required_keys
-    ]
-
-    # Run check
-    _check_required_properties_keys(
-        required_properties_keys,
-        default_COCO_schema,
-    )
-
-
 @pytest.mark.parametrize(
     "validator, input_file, expected_exception",
     [
diff --git a/tests/test_unit/test_io_detections/test_validators.py b/tests/test_unit/test_validators/test_detections.py
similarity index 99%
rename from tests/test_unit/test_io_detections/test_validators.py
rename to tests/test_unit/test_validators/test_detections.py
index aa4abf9b..d053d6ef 100644
--- a/tests/test_unit/test_io_detections/test_validators.py
+++ b/tests/test_unit/test_validators/test_detections.py
@@ -4,7 +4,7 @@
 import pytest
 import xarray as xr
 
-from ethology.io.detections.validate import ValidBboxDetectionsDataset
+from ethology.validators.detections import ValidBboxDetectionsDataset
 
 
 @pytest.fixture
diff --git a/tests/test_unit/test_validators/test_json_schemas.py b/tests/test_unit/test_validators/test_json_schemas.py
new file mode 100644
index 00000000..f496043f
--- /dev/null
+++ b/tests/test_unit/test_validators/test_json_schemas.py
@@ -0,0 +1,268 @@
+from contextlib import nullcontext as does_not_raise
+
+import pytest
+
+from ethology.validators.annotations import ValidCOCO, ValidVIA
+from ethology.validators.json_schemas.utils import (
+    _check_required_keys_in_dict,
+    _check_required_properties_keys,
+    _extract_properties_keys,
+)
+
+
+@pytest.fixture()
+def default_VIA_schema() -> dict:
+    """Get default VIA schema."""
+    from ethology.validators.json_schemas.utils import _get_default_schema
+
+    return _get_default_schema("VIA")
+
+
+@pytest.fixture()
+def default_COCO_schema() -> dict:
+    """Get default COCO schema."""
+    from ethology.validators.json_schemas.utils import _get_default_schema
+
+    return _get_default_schema("COCO")
+
+
+@pytest.mark.parametrize(
+    "schema, expected_properties_keys",
+    [
+        ("small_schema", ["a", "b", "b/b1", "c", "c/c1", "c/c2"]),
+        (
+            "default_VIA_schema",
+            [
+                "_via_attributes",
+                "_via_attributes/file",
+                "_via_attributes/region",
+                "_via_attributes/region/default_options",
+                "_via_attributes/region/description",
+                "_via_attributes/region/options",
+                "_via_attributes/region/type",
+                "_via_data_format_version",
+                "_via_image_id_list",
+                "_via_img_metadata",
+                "_via_img_metadata/file_attributes",
+                "_via_img_metadata/filename",
+                "_via_img_metadata/regions",
+                "_via_img_metadata/regions/region_attributes",
+                "_via_img_metadata/regions/shape_attributes",
+                "_via_img_metadata/regions/shape_attributes/height",
+                "_via_img_metadata/regions/shape_attributes/name",
+                "_via_img_metadata/regions/shape_attributes/width",
+                "_via_img_metadata/regions/shape_attributes/x",
+                "_via_img_metadata/regions/shape_attributes/y",
+                "_via_img_metadata/size",
+                "_via_settings",
+                "_via_settings/core",
+                "_via_settings/project",
+                "_via_settings/ui",
+            ],
+        ),
+        (
+            "default_COCO_schema",
+            [
+                "annotations",
+                "annotations/area",
+                "annotations/bbox",
+                "annotations/category_id",
+                "annotations/id",
+                "annotations/image_id",
+                "annotations/iscrowd",
+                "categories",
+                "categories/id",
+                "categories/name",
+                "categories/supercategory",
+                "images",
+                "images/file_name",
+                "images/height",
+                "images/id",
+                "images/width",
+                "info",
+                "licenses",
+            ],
+        ),
+    ],
+)
+def test_extract_properties_keys(
+    schema: dict,
+    expected_properties_keys: list,
+    request: pytest.FixtureRequest,
+):
+    """Test the _extract_properties_keys helper function."""
+    schema = request.getfixturevalue(schema)
+    assert _extract_properties_keys(schema) == sorted(expected_properties_keys)
+
+
+@pytest.mark.parametrize(
+    (
+        "list_required_keys, input_dict, additional_message, "
+        "expected_exception, expected_message"
+    ),
+    [
+        (
+            ["images", "annotations", "categories"],
+            {
+                "images": [1, 2, 3],
+                "annotations": [1, 2, 3],
+                "categories": [1, 2, 3],
+            },
+            "",
+            does_not_raise(),
+            "",
+        ),  # zero missing keys, and all keys map to non-empty values
+        (
+            ["images", "annotations", "categories"],
+            {
+                "images": [],
+                "annotations": [1, 2, 3],
+                "categories": [1, 2, 3],
+            },
+            "",
+            pytest.raises(ValueError),
+            "Empty value(s) found for the required key(s) ['images'].",
+        ),  # zero missing keys, but one ("images") maps to empty values
+        (
+            ["images", "annotations", "categories"],
+            {
+                "images": [],
+                "annotations": {},
+                "categories": [1, 2, 3],
+            },
+            "",
+            pytest.raises(ValueError),
+            (
+                "Empty value(s) found for the required key(s) "
+                "['annotations', 'images']."
+            ),
+        ),  # zero missing keys, but two keys map to empty values
+        (
+            ["images", "annotations", "categories"],
+            {"annotations": "", "categories": ""},
+            "",
+            pytest.raises(ValueError),
+            "Required key(s) ['images'] not found.",
+        ),  # one missing key
+        (
+            ["images", "annotations", "categories"],
+            {"annotations": ""},
+            "",
+            pytest.raises(ValueError),
+            "Required key(s) ['categories', 'images'] not found.",
+        ),  # two missing keys
+        (
+            ["images", "annotations", "categories"],
+            {"annotations": "", "categories": ""},
+            "FOO",
+            pytest.raises(ValueError),
+            "Required key(s) ['images'] not foundFOO.",
+        ),  # one missing key with additional message for missing keys
+    ],
+)
+def test_check_required_keys_in_dict(
+    list_required_keys: list,
+    input_dict: dict,
+    additional_message: str,
+    expected_exception: pytest.raises,
+    expected_message: str,
+):
+    """Test the _check_required_keys_in_dict helper function.
+
+    The check verifies that the required keys are defined in the input
+    dictionary and if they are, it checks that they do not map to empty
+    values.
+    """
+    with expected_exception as excinfo:
+        _check_required_keys_in_dict(
+            list_required_keys, input_dict, additional_message
+        )
+
+    # Check error message
+    if excinfo:
+        assert expected_message in str(excinfo.value)
+
+
+def test_check_required_properties_keys(small_schema: dict):
+    """Test the _check_required_keys helper function."""
+    # Define a sample schema from "small_schema"
+    # with a "properties" key missing (e.g. "c/c2")
+    small_schema["properties"]["c"]["properties"].pop("c2")
+
+    # Define required "properties" keys
+    required_keys = ["a", "b", "c/c2"]
+
+    # Run check
+    with pytest.raises(ValueError) as excinfo:
+        _check_required_properties_keys(required_keys, small_schema)
+
+    # Check error message
+    assert "Required key(s) ['c/c2'] not found in schema" in str(excinfo.value)
+
+
+@pytest.mark.parametrize(
+    "input_file,",
+    [
+        "VIA_JSON_sample_1.json",
+        "VIA_JSON_sample_2.json",
+    ],
+)
+def test_required_keys_in_provided_VIA_schema(
+    input_file: str, default_VIA_schema: dict, annotations_test_data: dict
+):
+    """Check the provided VIA schema contains the ValidVIA required keys."""
+    # Get required keys from a VIA valid file
+    filepath = annotations_test_data[input_file]
+    valid_VIA = ValidVIA(path=filepath)
+    required_VIA_keys = valid_VIA.required_keys
+
+    # Map required keys to "properties" keys in schema
+    map_required_to_properties_keys = {
+        "main": "",
+        "images": "_via_img_metadata",
+        "regions": "_via_img_metadata/regions",
+        "shape_attributes": "_via_img_metadata/regions/shape_attributes",
+    }
+
+    # Express required keys as required "properties" keys
+    required_property_keys = [
+        val if ky == "main" else f"{map_required_to_properties_keys[ky]}/{val}"
+        for ky, values in required_VIA_keys.items()
+        for val in values
+    ]
+
+    # Run check
+    _check_required_properties_keys(
+        required_property_keys,
+        default_VIA_schema,
+    )
+
+
+@pytest.mark.parametrize(
+    "input_file,",
+    [
+        "COCO_JSON_sample_1.json",
+        "COCO_JSON_sample_2.json",
+    ],
+)
+def test_required_keys_in_provided_COCO_schema(
+    input_file: str, default_COCO_schema: dict, annotations_test_data: dict
+):
+    """Check the provided COCO schema contains the ValidCOCO required keys."""
+    # Get required keys from a COCO valid file
+    filepath = annotations_test_data[input_file]
+    valid_COCO = ValidCOCO(path=filepath)
+    required_COCO_keys = valid_COCO.required_keys
+
+    # Prepare list of required "properties" keys with full paths
+    required_properties_keys = [
+        f"{level}/{ky}" if level != "main" else ky
+        for level, required_keys in required_COCO_keys.items()
+        for ky in required_keys
+    ]
+
+    # Run check
+    _check_required_properties_keys(
+        required_properties_keys,
+        default_COCO_schema,
+    )

From f3774ea23d9fe45a3e5ddc801a12ad9d061463da Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 17:58:27 +0000
Subject: [PATCH 09/13] Fix manifest

---
 MANIFEST.in | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index 6d0b1021..b4a7fbe2 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -12,5 +12,5 @@ recursive-exclude examples *
 recursive-include docs *
 
 # Include json schemas
-recursive-include ethology/io/annotations/json_schemas/schemas *.json
-recursive-include ethology/io/annotations/json_schemas/schemas *.md
+recursive-include ethology/validatorss/json_schemas/schemas *.json
+recursive-include ethology/validators/json_schemas/schemas *.md

From ae1dff364861c45eb1267542dfe20ec05e67e9f3 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 18:02:25 +0000
Subject: [PATCH 10/13] Fix typo in manifest

---
 MANIFEST.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index b4a7fbe2..c6d258bd 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -12,5 +12,5 @@ recursive-exclude examples *
 recursive-include docs *
 
 # Include json schemas
-recursive-include ethology/validatorss/json_schemas/schemas *.json
+recursive-include ethology/validators/json_schemas/schemas *.json
 recursive-include ethology/validators/json_schemas/schemas *.md

From 2e6ef3af8c455d6646cc5caab0c2b8cdce193f52 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 19:30:37 +0000
Subject: [PATCH 11/13] Remove docs warnings

---
 docs/source/_templates/autosummary/class.rst | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/docs/source/_templates/autosummary/class.rst b/docs/source/_templates/autosummary/class.rst
index 07889c22..4c075c91 100644
--- a/docs/source/_templates/autosummary/class.rst
+++ b/docs/source/_templates/autosummary/class.rst
@@ -3,11 +3,9 @@
 .. currentmodule:: {{ module }}
 
 .. autoclass:: {{ objname }}
-   :members:
-   :show-inheritance:
-   :inherited-members:
-   :exclude-members: Config
-
+   {% if objname != 'ValidDataset' %}:members:{% endif %}
+   {% if objname != 'ValidDataset' %}:inherited-members:{% endif %}
+   {% if objname == 'ValidBboxAnnotationsDataFrame' %}:exclude-members: Config{% endif %}
 
    {% block methods %}
    {% set ns = namespace(has_public_methods=false) %}

From 47976f4f5631ef442e3e352a2204395c8da5dbeb Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 19:54:37 +0000
Subject: [PATCH 12/13] Update to autodoc defaults new syntax and simplify

---
 docs/source/conf.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index a06e7427..6db7d86d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -70,7 +70,7 @@
 # Automatically generate stub pages for API
 autosummary_generate = True
 autosummary_generate_overwrite = False
-autodoc_default_flags = ["members", "inherited-members"]
+autodoc_default_options = {"show-inheritance": True}  # applies to all classes
 
 # Prefix section labels with the document name
 autosectionlabel_prefix_document = True
@@ -182,6 +182,10 @@
     "pandera": ("https://pandera.readthedocs.io/en/stable/", None),
     "movement": ("https://movement.neuroinformatics.dev/latest/", None),
     "sklearn": ("https://scikit-learn.org/stable/", None),
+    "jsonschema": (
+        "https://python-jsonschema.readthedocs.io/en/stable/",
+        None,
+    ),
 }
 
 

From 10fc95ba45105e2d965cf9dd26a87793446e1795 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 20 Nov 2025 20:07:09 +0000
Subject: [PATCH 13/13] Review docstrings

---
 ethology/validators/annotations.py | 24 +++++++++++++++++-------
 ethology/validators/detections.py  | 24 ++++++++++++++++++------
 ethology/validators/utils.py       | 26 ++++++++++++++++----------
 3 files changed, 51 insertions(+), 23 deletions(-)

diff --git a/ethology/validators/annotations.py b/ethology/validators/annotations.py
index 84653110..0ecb886c 100644
--- a/ethology/validators/annotations.py
+++ b/ethology/validators/annotations.py
@@ -228,26 +228,36 @@ def _file_contains_unique_image_IDs(self, attribute, value):
 class ValidBboxAnnotationsDataset(ValidDataset):
     """Class for valid ``ethology`` bounding box annotations datasets.
 
-    It checks that the input dataset has:
+    This class validates that the input dataset:
+
+    - is an xarray Dataset,
+    - has ``image_id``, ``space``, ``id`` as dimensions,
+    - has ``position`` and ``shape`` as data variables,
+    - both data variables span at least the dimensions ``image_id``,
+      ``space`` and ``id``.
 
-    - ``image_id``, ``space``, ``id`` as dimensions
-    - ``position`` and ``shape`` as data variables
 
     Attributes
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
-    required_dims : set
-        Set of required dimension names.
+    required_dims : set[str]
+        The set of required dimension names: ``image_id``, ``space`` and
+        ``id``.
     required_data_vars : dict[str, set]
-        A dictionary mapping data variable names to their required dimensions.
+        A dictionary mapping data variable names to their required minimum
+        dimensions:
+
+        - ``position`` maps to ``image_id``, ``space`` and ``id``,
+        - ``shape`` maps to ``image_id``, ``space`` and ``id``.
 
     Raises
     ------
     TypeError
         If the input is not an xarray Dataset.
     ValueError
-        If the dataset is missing required data variables or dimensions.
+        If the dataset is missing required data variables or dimensions,
+        or if any required dimensions are missing for any data variable.
 
     Notes
     -----
diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py
index 62bb609e..1f6d9df6 100644
--- a/ethology/validators/detections.py
+++ b/ethology/validators/detections.py
@@ -9,26 +9,38 @@
 class ValidBboxDetectionsDataset(ValidDataset):
     """Class for valid ``ethology`` bounding box detections datasets.
 
-    It checks that the input dataset has:
+    This class validates that the input dataset:
+
+    - is an xarray Dataset,
+    - has ``image_id``, ``space``, ``id`` as dimensions,
+    - has ``position``, ``shape`` and ``confidence`` as data variables,
+    - ``position`` and ``shape`` span at least the dimensions ``image_id``,
+      ``space`` and ``id``,
+    - ``confidence`` spans at least the dimensions ``image_id`` and ``id``.
 
-    - ``image_id``, ``space``, ``id`` as dimensions
-    - ``position``, ``shape`` and ``confidence`` as data variables
 
     Attributes
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
     required_dims : set
-        Set of required dimension names.
+        The set of required dimension names: ``image_id``, ``space`` and
+        ``id``.
     required_data_vars : dict[str, set]
-        A dictionary mapping data variable names to their required dimensions.
+        A dictionary mapping data variable names to their required minimum
+        dimensions:
+
+        - ``position`` maps to ``image_id``, ``space`` and ``id``,
+        - ``shape`` maps to ``image_id``, ``space`` and ``id``,
+        - ``confidence`` maps to ``image_id`` and ``id``.
 
     Raises
     ------
     TypeError
         If the input is not an xarray Dataset.
     ValueError
-        If the dataset is missing required data variables or dimensions.
+        If the dataset is missing required data variables or dimensions,
+        or if any required dimensions are missing for any data variable.
 
     Notes
     -----
diff --git a/ethology/validators/utils.py b/ethology/validators/utils.py
index 4739f32d..ce74a289 100644
--- a/ethology/validators/utils.py
+++ b/ethology/validators/utils.py
@@ -12,10 +12,12 @@
 class ValidDataset(ABC):
     """An abstract base class for valid ``ethology`` datasets.
 
-    It checks that the input dataset has:
+    This class validates that the input dataset:
 
-    - required dimensions
-    - required data variables
+    - is an xarray Dataset
+    - contains all required dimensions
+    - contains all required data variables
+    - has the correct dimensions for each data variable
 
     Subclasses must define ``required_dims`` and ``required_data_vars``
     attributes.
@@ -24,17 +26,21 @@ class ValidDataset(ABC):
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
-    required_dims : set
-        Set of required dimension names (defined by subclasses).
-    required_data_vars : set
-        Set of required data variable names (defined by subclasses).
+    required_dims : set[str]
+        A set of required dimension names. This attribute should be
+        defined by any subclass inheriting from this class.
+    required_data_vars : dict[str, set]
+        A dictionary mapping data variable names to their required dimensions.
+        This attribute should be defined by any subclass inheriting from
+        this class.
 
     Raises
     ------
     TypeError
         If the input is not an xarray Dataset.
     ValueError
-        If the dataset is missing required data variables or dimensions.
+        If the dataset is missing required data variables or dimensions,
+        or if any required dimensions are missing for any data variable.
 
     Notes
     -----
@@ -49,13 +55,13 @@ class ValidDataset(ABC):
     @property
     @abstractmethod
     def required_dims(self) -> set:
-        """Subclasses must provide a required_dims property."""
+        """Subclasses must provide a ``required_dims`` property."""
         pass  # pragma: no cover
 
     @property
     @abstractmethod
     def required_data_vars(self) -> dict[str, set]:
-        """Subclasses must provide a required_data_vars property."""
+        """Subclasses must provide a ``required_data_vars`` property."""
         pass  # pragma: no cover
 
     # Validators