From d36c727fc05b0afb2b28f03d4e627f6460591ca3 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 13 Nov 2025 20:58:25 +0000
Subject: [PATCH 01/39] Example and lightning model

---
 ethology/detectors/ensembles/__init__.py |   0
 ethology/detectors/ensembles/fusion.py   |  40 ++++++
 ethology/detectors/ensembles/models.py   | 164 +++++++++++++++++++++
 ethology/detectors/ensembles/utils.py    | 107 ++++++++++++++
 examples/ensemble_of_detectors.py        | 172 +++++++++++++++++++++++
 pyproject.toml                           |   8 +-
 6 files changed, 490 insertions(+), 1 deletion(-)
 create mode 100644 ethology/detectors/ensembles/__init__.py
 create mode 100644 ethology/detectors/ensembles/fusion.py
 create mode 100644 ethology/detectors/ensembles/models.py
 create mode 100644 ethology/detectors/ensembles/utils.py
 create mode 100644 examples/ensemble_of_detectors.py

diff --git a/ethology/detectors/ensembles/__init__.py b/ethology/detectors/ensembles/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
new file mode 100644
index 00000000..58521a69
--- /dev/null
+++ b/ethology/detectors/ensembles/fusion.py
@@ -0,0 +1,40 @@
+"""Wrappers around ensemble-boxes fusion functions."""
+import numpy as np
+from ensemble_boxes import weighted_boxes_fusion
+
+
+def weighted_boxes_fusion_in_pixels(
+    image_height_width: tuple[int, int],
+    boxes_list: list[np.ndarray],
+    scores_list: list[np.ndarray],
+    labels_list: list[np.ndarray],
+    iou_thr: float,
+    skip_box_thr: float,
+):
+    """Fuse bboxes for a single image and return in pixels."""
+    # Normalize boxes using image shape
+    image_height, image_width = image_height_width
+    boxes_list = [
+        boxes
+        / np.array([image_width, image_height, image_width, image_height])
+        if len(boxes) > 0
+        else boxes
+        for boxes in boxes_list
+    ]
+
+    # Apply WBF
+    fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion(
+        boxes_list,
+        scores_list,
+        labels_list,
+        iou_thr=iou_thr,
+        skip_box_thr=skip_box_thr,
+    )
+
+    # Denormalize boxes
+    # Format of returned bboxes is x1y1x2y2 in pixels like fasterrcnn
+    fused_boxes = fused_boxes * np.array(
+        [image_width, image_height, image_width, image_height]
+    )
+
+    return fused_boxes, fused_scores, fused_labels
\ No newline at end of file
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
new file mode 100644
index 00000000..a42ce1ef
--- /dev/null
+++ b/ethology/detectors/ensembles/models.py
@@ -0,0 +1,164 @@
+"""Lightning Modules for ensembles of detectors."""
+
+from itertools import chain
+from pathlib import Path
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torchvision.models.detection as detection_models
+import xarray as xr
+import yaml
+from joblib import Parallel, delayed
+from lightning import LightningModule
+
+from ethology.detectors.ensembles.fusion import weighted_boxes_fusion_in_pixels
+from ethology.detectors.ensembles.utils import (
+    arrays_to_ds_variables,
+    pad_to_max_first_dimension,
+)
+
+
+class EnsembleDetector(LightningModule):
+    """Ensemble of (trained) detectors for inference.
+
+    Attributes
+    ----------
+    config_file: str
+        Path to the YAML config file.
+    """
+
+    def __init__(self, config_file: str | Path):
+        super().__init__()
+
+        # Load config
+        self.config_file = Path(config_file)
+        with open(self.config_file) as f:
+            self.config = yaml.safe_load(f)
+
+        # Load list of models (nn.ModuleList)
+        self.list_models = self.load_models()
+
+    def load_models(self) -> nn.ModuleList:
+        """Load models from checkpoints."""
+        models_config = self.config["models"]
+        model_class = getattr(detection_models, models_config["model_class"])
+
+        list_models = []
+        for checkpoint_path in models_config["checkpoints"]:
+            # Get model architecture and weights
+            model = model_class(**models_config["model_kwargs"])
+            checkpoint = torch.load(checkpoint_path, map_location=self.device)
+            state_dict = checkpoint["state_dict"]
+
+            # Load state dict into model
+            # PyTorch Lightning saves the model with a "model."
+            # prefix in the state_dict keys if you defined self.model
+            # in your LightningModule - we remove the prefix here.
+            if any(key.startswith("model.") for key in state_dict):
+                model_state_dict = {
+                    key.replace("model.", "", 1): value
+                    for key, value in state_dict.items()
+                    if key.startswith("model.")
+                }
+            else:
+                model_state_dict = state_dict
+            model.load_state_dict(model_state_dict)
+
+            # Append to list
+            list_models.append(model)
+        return nn.ModuleList(list_models)
+
+    def fuse_bboxes(self, images_batch, predictions_per_model: list[dict]):
+        """Fuse bboxes per sample in CPU in parallel."""
+        # Fuse bboxes per sample in CPU in parallel
+        # Dispatch fusion tasks to executor (non-blocking)
+        # if self.config["fusion"]["method"] == "wbf"
+
+        # n_jobs = -1 means Use ALL available CPU cores
+        # n_jobs = -2 means Use ALL available CPU cores except one
+        n_jobs = self.config["fusion"].get("n_jobs", -1)
+
+        # Parallel WBF fusion
+        batch_size = len(images_batch)
+        results_batch = Parallel(n_jobs=n_jobs)(
+            delayed(weighted_boxes_fusion_in_pixels)(
+                images_batch[i].shape[-2:],  # image height and width
+                [
+                    preds[i]["boxes"].cpu().numpy()
+                    for preds in predictions_per_model
+                ],  # same image across all models
+                [
+                    preds[i]["scores"].cpu().numpy()
+                    for preds in predictions_per_model
+                ],
+                [
+                    preds[i]["labels"].cpu().numpy()
+                    for preds in predictions_per_model
+                ],
+                self.config["fusion"]["iou_th_ensemble"],
+                self.config["fusion"]["skip_box_th"],
+            )
+            for i in range(batch_size)
+        )  # list [(bboxes, scores, labels) * batch_size]
+
+        fused_boxes_batch, fused_scores_batch, fused_labels_batch = (
+            zip(*results_batch, strict=True) if results_batch else ([], [], [])
+        )
+
+        return fused_boxes_batch, fused_scores_batch, fused_labels_batch
+
+    def predict_step(self, batch, batch_idx):
+        """Predict step for a single batch."""
+        # ------------------------------
+        # Run all models in ensemble in GPU
+        # TODO: can I vectorize this?
+        # https://docs.pytorch.org/tutorials/intermediate/ensembling.html
+        images_batch, _annotations_batch = batch
+        predictions_per_model = [
+            model(images_batch) for model in self.list_models
+        ]  # [num_models][batch_size]
+
+        # ------------------------------
+        # Fuse bboxes per sample in CPU in parallel
+        fused_boxes_batch, fused_scores_batch, fused_labels_batch = (
+            self.fuse_bboxes(images_batch, predictions_per_model)
+        )
+
+        return fused_boxes_batch, fused_scores_batch, fused_labels_batch
+
+    @staticmethod
+    def format_predictions(raw_predictions):
+        """Format as ethology detections dataset."""
+        # Unzip data per batch
+        (
+            fused_boxes_per_batch,
+            fused_scores_per_batch,
+            fused_labels_per_batch,
+        ) = zip(*raw_predictions, strict=True)  # [n_batches][batch_size]
+
+        # Flatten across all batches
+        fused_boxes = list(chain.from_iterable(fused_boxes_per_batch))
+        fused_scores = list(chain.from_iterable(fused_scores_per_batch))
+        fused_labels = list(chain.from_iterable(fused_labels_per_batch))
+
+        # Pad arrays to max n of detections per image
+        fused_boxes_padded = pad_to_max_first_dimension(fused_boxes)
+        fused_scores_padded = pad_to_max_first_dimension(fused_scores)
+        fused_labels_padded = pad_to_max_first_dimension(fused_labels)
+
+        # Stack into arrays
+        bboxes_array = np.transpose(
+            np.stack(fused_boxes_padded), (0, -1, 1)
+        )  # image_id, space-4, id
+        scores_array = np.stack(fused_scores_padded)
+        labels_array = np.stack(fused_labels_padded)
+
+        # ------------------------------
+        # Return as ethology detections dataset
+        ds_variables = arrays_to_ds_variables(
+            bboxes_array, scores_array, labels_array
+        )
+        detections_ds = xr.Dataset(data_vars=ds_variables)
+
+        return detections_ds
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
new file mode 100644
index 00000000..f39956ed
--- /dev/null
+++ b/ethology/detectors/ensembles/utils.py
@@ -0,0 +1,107 @@
+"""Utility functions for reshaping outputs of ensembles of detectors."""
+import numpy as np
+import xarray as xr
+
+
+def get_padding_width(array, max_n):
+    """Get pad width for array to max_n detections in the first dimension."""
+    pad_width = array.ndim * [(0, 0)]
+    pad_width[0] = (0, max_n - array.shape[0])  # before, after
+    return pad_width
+
+
+def pad_to_max_first_dimension(list_arrays):
+    """Pad arrays to maximum number across all arrays in the first dimension."""
+    max_n_detections = max(array.shape[0] for array in list_arrays)
+    list_arrays_padded = [
+        np.pad(
+            arr,
+            get_padding_width(arr, max_n_detections),
+            mode="constant",
+            constant_values=np.nan,
+        )
+        for arr in list_arrays
+    ]
+    return list_arrays_padded
+
+
+def arrays_to_ds_variables(
+    bboxes_x1y1x2y2_array: np.ndarray,
+    scores_array: np.ndarray,
+    labels_array: np.ndarray,
+    id_array: np.ndarray | None = None,
+) -> dict[str, xr.DataArray]:
+    """Convert arrays to dictionary of dataset variables.
+
+    Parameters
+    ----------
+    bboxes_x1y1x2y2_array: np.ndarray
+        Array of bounding box coordinates with shape
+        [Nimages, 4, Nmax_detections], in format x1y1x2y2 in units of pixels.
+        Nmax_detections is the maximum number of detections per image.
+    scores_array: np.ndarray
+        Array of shape [Nimages, Nmax_detections]
+    labels_array: np.ndarray
+        Array of shape [Nimages, Nmax_detections]
+    id_array: np.ndarray | None, optional
+        Array of shape [Nmax_detections]. If None, will be set to
+        range(Nmax_detections).
+    """
+    n_images = bboxes_x1y1x2y2_array.shape[0]
+    n_max_detections = bboxes_x1y1x2y2_array.shape[-1]
+    if id_array is None:
+        id_array = np.arange(n_max_detections)
+
+    # centroid dataarray (x, y)
+    centroid_da = xr.DataArray(
+        data=0.5
+        * (
+            bboxes_x1y1x2y2_array[:, 0:2, :] + bboxes_x1y1x2y2_array[:, 2:4, :]
+        ), 
+        dims=["image_id", "space", "id"],
+        coords={
+            "image_id": np.arange(n_images),
+            "space": ["x", "y"],
+            "id": id_array,
+        },
+    )
+
+    # shape dataarray (width, height)
+    shape_da = xr.DataArray(
+        data=(
+            bboxes_x1y1x2y2_array[:, 2:4, :] - bboxes_x1y1x2y2_array[:, 0:2, :]
+        ),
+        dims=["image_id", "space", "id"],
+        coords={
+            "image_id": np.arange(n_images),
+            "space": ["x", "y"],
+            "id": id_array,
+        },
+    )
+
+    # confidence dataarray
+    confidence_da = xr.DataArray(
+        data=scores_array,
+        dims=["image_id", "id"],
+        coords={
+            "image_id": np.arange(n_images),
+            "id": id_array,
+        },
+    )
+
+    # label dataarray
+    label_da = xr.DataArray(
+        data=labels_array,
+        dims=["image_id", "id"],
+        coords={
+            "image_id": np.arange(n_images),
+            "id": id_array,
+        },
+    )
+
+    return {
+        "position": centroid_da,
+        "shape": shape_da,
+        "confidence": confidence_da,
+        "label": label_da,
+    }
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
new file mode 100644
index 00000000..564b8ca5
--- /dev/null
+++ b/examples/ensemble_of_detectors.py
@@ -0,0 +1,172 @@
+# %%
+# imports
+
+from pathlib import Path
+
+import torch
+import torchvision.transforms.v2 as transforms
+import yaml
+from lightning import Trainer
+from torch.utils.data import DataLoader
+from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
+
+from ethology.detectors.ensembles.models import EnsembleDetector
+
+# from ethology.detectors.evaluate import compute_precision_recall_ds
+# from ethology.io.annotations import load_bboxes
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+# Helper functions
+def create_coco_dataset(
+    images_dir: str | Path,
+    annotations_file: str | Path,
+    composed_transform: transforms.Compose,
+) -> CocoDetection:
+    """Create a COCO dataset for object detection.
+
+    Note: transforms are applied to the full dataset. If the dataset
+    is later split, all splits will have the same transforms.
+    """
+    dataset_coco = CocoDetection(
+        root=images_dir,
+        annFile=annotations_file,
+        transforms=composed_transform,
+    )
+
+    # wrap dataset for transforms v2
+    dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco)
+
+    return dataset_transformed
+
+
+def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
+    """Collate function for dataloader with varying number of bounding boxes.
+
+    A custom function is needed for detection
+    because the number of bounding boxes varies
+    between images of the same batch.
+    See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop
+
+    Parameters
+    ----------
+    batch : tuple
+        a tuple of 2 tuples, the first one holding all images in the batch,
+        and the second one holding the corresponding annotations.
+
+    Returns
+    -------
+    tuple
+        a tuple of length = batch size, made up of (image, annotations)
+        tuples.
+
+    """
+    return tuple(zip(*batch, strict=True))
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Input data
+
+dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full")
+annotations_dir = dataset_dir / "annotations"
+annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json"
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Define a dataloader
+# Define transforms for inference
+inference_transforms = transforms.Compose(
+    [
+        transforms.ToImage(),
+        transforms.ToDtype(torch.float32, scale=True),
+    ]
+)
+
+# Create COCO dataset
+# TODO: convert from ethology detections dataset to COCO dataset
+dataset_coco = create_coco_dataset(
+    images_dir=Path(dataset_dir) / "frames",
+    annotations_file=annotations_file_path,
+    composed_transform=inference_transforms,
+)
+
+# dataloader
+dataloader = DataLoader(
+    dataset_coco,
+    batch_size=12,
+    shuffle=False,
+    num_workers=4,
+    collate_fn=collate_fn_varying_n_bboxes,
+    persistent_workers=True,
+    # multiprocessing_context="fork"
+    # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available()
+    # else None,  # see https://github.com/pytorch/pytorch/issues/87688
+)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Define a YAML config file for the ensemble of trained detectors
+
+config = {
+    "models": {
+        "model_class": "fasterrcnn_resnet50_fpn_v2",
+        # imported from torchvision.models.detection
+        "model_kwargs": {
+            "num_classes": 2,
+            "weights": None,  # null in YAML becomes None in Python
+            "weights_backbone": None,
+        },
+        "checkpoints": [
+            "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt",
+            "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt",
+        ],
+    },
+    "fusion": {
+        "method": "wbf",
+        "iou_th_ensemble": 0.5,
+        "skip_box_th": 0.0001,
+        "n_jobs": 2,  # workers for joblib.Parallel, n_workers should be <= number of CPU cores
+        # "confidence_threshold_post_fusion": 0.0,
+        # "max_n_detections": 300
+    },
+}
+config_file = "ensemble_of_detectors.yaml"
+with open(config_file, "w") as f:
+    yaml.dump(config, f, sort_keys=False)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Load the ensemble of detectors
+ensemble_detector = EnsembleDetector(config_file)
+print(f"Ensemble detector is on device: {ensemble_detector.device}")
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Run the ensemble of detectors on a dataset
+# Use Trainer for inference (this sets the device flexibly)
+trainer = Trainer(accelerator="gpu", devices=1, logger=False)
+raw_predictions = trainer.predict(ensemble_detector, dataloader)
+
+# format predictions as ethology detections dataset
+fused_detections_ds = ensemble_detector.format_predictions(raw_predictions)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# # Evaluate the ensemble model
+# # - load ground truth
+# # - compute metrics
+
+# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
+
+
+# fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds(
+#     pred_bboxes_ds=fused_detections_ds,
+#     gt_bboxes_ds=gt_bboxes_ds,
+#     iou_threshold=0.1,  # change to 0.5?
+# )
+
+
+# print(
+#     "Ensemble model with confidence threshold post fusion: "
+#     f"{ensemble_detector.config['fusion']['confidence_threshold_post_fusion']}"
+# )
+# print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}")
+# print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}")
diff --git a/pyproject.toml b/pyproject.toml
index a2d5b2e9..ad9b345e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -19,13 +19,19 @@ classifiers = [
   "License :: OSI Approved :: BSD License",
 ]
 dependencies = [
-  "movement",
+  "xarray",
+  "pooch",
+  "pyyaml",
   "pandera[pandas]",
   "pycocotools",
+  "movement",
   "scikit-learn",
   "torch",
   "torchvision",
+  "ensemble-boxes",
+  "lightning",
   "loguru",
+  "joblib",
 ]
 
 [project.urls]

From 77fc4fe380b0e7fad499ae52a6613df332924be7 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 13 Nov 2025 21:27:43 +0000
Subject: [PATCH 02/39] Add evalution

---
 ethology/detectors/__init__.py    |   0
 ethology/detectors/evaluate.py    | 245 ++++++++++++++++++++++++++++++
 examples/ensemble_of_detectors.py |  98 +++++++++---
 3 files changed, 322 insertions(+), 21 deletions(-)
 create mode 100644 ethology/detectors/__init__.py
 create mode 100644 ethology/detectors/evaluate.py

diff --git a/ethology/detectors/__init__.py b/ethology/detectors/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/ethology/detectors/evaluate.py b/ethology/detectors/evaluate.py
new file mode 100644
index 00000000..f991420c
--- /dev/null
+++ b/ethology/detectors/evaluate.py
@@ -0,0 +1,245 @@
+"""Utilities for evaluating detectors."""
+
+import numpy as np
+import torch
+import torchvision.ops as ops
+import xarray as xr
+from scipy.optimize import linear_sum_assignment
+
+
+def evaluate_detections_hungarian_ds(
+    pred_bboxes_ds: xr.Dataset,
+    gt_bboxes_ds: xr.Dataset,
+    iou_threshold: float,
+) -> tuple[xr.Dataset, xr.Dataset]:
+    """Compute true positives, false positives, and missed detections.
+
+    Uses Hungarian algorithm for matching.
+    """
+    # Add xy_min and xy_max if not present
+    if all(
+        [
+            var_str not in pred_bboxes_ds.variables
+            for var_str in ["xy_min", "xy_max"]
+        ]
+    ):
+        pred_bboxes_ds = _add_bboxes_min_max_corners(pred_bboxes_ds)
+
+    if all(
+        [
+            var_str not in gt_bboxes_ds.variables
+            for var_str in ["xy_min", "xy_max"]
+        ]
+    ):
+        gt_bboxes_ds = _add_bboxes_min_max_corners(gt_bboxes_ds)
+
+    # Prepare input for hungarian
+    pred_bboxes_x1y1_x2y2 = xr.concat(
+        [pred_bboxes_ds.xy_min, pred_bboxes_ds.xy_max], dim="space"
+    ).transpose("image_id", "id", "space")
+
+    gt_bboxes_x1y1_x2y2 = xr.concat(
+        [gt_bboxes_ds.xy_min, gt_bboxes_ds.xy_max], dim="space"
+    ).transpose("image_id", "id", "space")
+
+    # rename id dimension in gt_bboxes_x1y1_x2y2
+    gt_bboxes_x1y1_x2y2 = gt_bboxes_x1y1_x2y2.rename({"id": "id_gt"})
+
+    # Run hungarian vectorized
+    tp_array, fp_array, md_array, iou_tp_array = xr.apply_ufunc(
+        _evaluate_detections_hungarian_arrays,
+        pred_bboxes_x1y1_x2y2,
+        gt_bboxes_x1y1_x2y2,
+        kwargs={"iou_threshold": iou_threshold},
+        input_core_dims=[
+            ["id", "space"],
+            ["id_gt", "space"],
+        ],
+        output_core_dims=[
+            ["id"],
+            ["id"],
+            ["id_gt"],
+            ["id"],
+        ],
+        vectorize=True,
+        exclude_dims={"id", "id_gt"},
+    )
+
+    # Add to datasets
+    pred_bboxes_ds["tp"] = xr.DataArray(tp_array, dims=["image_id", "id"])
+    pred_bboxes_ds["fp"] = xr.DataArray(fp_array, dims=["image_id", "id"])
+    pred_bboxes_ds["iou_tp"] = xr.DataArray(
+        iou_tp_array, dims=["image_id", "id"]
+    )
+
+    # rename id dimension in md_array
+    md_array = md_array.rename({"id_gt": "id"})
+    gt_bboxes_ds["md"] = xr.DataArray(md_array, dims=["image_id", "id"])
+
+    return pred_bboxes_ds, gt_bboxes_ds
+
+
+def _evaluate_detections_hungarian_arrays(
+    pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, iou_threshold: float
+) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """Compute true positives, false positives, and missed detections.
+
+    Uses Hungarian algorithm for matching and takes arrays of bboxes as input
+    in x1y1x2y2 format.
+
+    Parameters
+    ----------
+    pred_bboxes : np.ndarray
+        An array of prediction bounding boxes with the first four columns being
+        the coordinates of the bounding box in the format [x1, y1, x2, y2]
+    gt_bboxes : np.ndarray
+        An array of ground truth bounding boxes with the first four columns
+        being the coordinates of the bounding box in the format
+        [x1, y1, x2, y2]
+    iou_threshold : float
+        IoU threshold for considering a detection as true positive
+
+    Returns
+    -------
+    tuple
+        A tuple of four boolean arrays:
+        - true_positives: True for each predicted bbox that is a true positive
+        - false_positives: True for each predicted bbox that is a false
+        positive
+        - missed_detections: True for each ground truth bbox that is missed
+        - true_positives_iou: IoU of each true positive
+
+    Notes
+    -----
+    The output arrays are padded with False to the length of the original
+    arrays. This means that for example where the true_positives array is
+    False, that does not necessarily mean that the prediction is a false
+    positive. The same applies for the true_positives_iou array, which is
+    padded with nan.
+
+    """
+    # Remove nan values
+    n_pred_bboxes_padded = pred_bboxes.shape[0]
+    n_gt_bboxes_padded = gt_bboxes.shape[0]
+    pred_bboxes = pred_bboxes[~np.isnan(pred_bboxes).any(axis=1), :]
+    gt_bboxes = gt_bboxes[~np.isnan(gt_bboxes).any(axis=1), :]
+
+    # Initialize output arrays
+    true_positives = np.zeros(len(pred_bboxes), dtype=bool)
+    false_positives = np.zeros(len(pred_bboxes), dtype=bool)
+    matched_gts = np.zeros(len(gt_bboxes), dtype=bool)
+    missed_detections = np.zeros(len(gt_bboxes), dtype=bool)  # unmatched gts
+
+    true_positives_iou = np.zeros(len(pred_bboxes), dtype=float)
+
+    # cast as a tensor if not already
+    if not isinstance(pred_bboxes, torch.Tensor):
+        pred_bboxes = torch.from_numpy(pred_bboxes).float()
+    if not isinstance(gt_bboxes, torch.Tensor):
+        gt_bboxes = torch.from_numpy(gt_bboxes).float()
+
+    if len(pred_bboxes) > 0 and len(gt_bboxes) > 0:
+        # Compute IoU matrix (pred_bboxes x gt_bboxes)
+        iou_matrix = ops.box_iou(pred_bboxes[:, :4], gt_bboxes).cpu().numpy()
+        # iou_matrix[np.isnan(iou_matrix)] = -np.inf
+
+        # Use Hungarian algorithm to find optimal assignment
+        pred_indices, gt_indices = linear_sum_assignment(
+            iou_matrix, maximize=True
+        )
+
+        # Mark true positives and false positives based on optimal assignment
+        for pred_idx, gt_idx in zip(pred_indices, gt_indices, strict=True):
+            if iou_matrix[pred_idx, gt_idx] > iou_threshold:
+                true_positives[pred_idx] = True
+                matched_gts[gt_idx] = True
+                true_positives_iou[pred_idx] = iou_matrix[pred_idx, gt_idx]
+            else:
+                false_positives[pred_idx] = True
+
+        # Mark unmatched predictions as false positives
+        false_positives[~true_positives] = True
+
+        # Mark unmatched ground truth as missed detections
+        missed_detections[~matched_gts] = True
+
+    elif len(pred_bboxes) == 0 and len(gt_bboxes) > 0:
+        # No predictions, all ground truth are missed
+        missed_detections[:] = True
+    elif len(pred_bboxes) > 0 and len(gt_bboxes) == 0:
+        # No ground truth, all predictions are false positives
+        false_positives[:] = True
+
+    # Pad tp, fp for pred_bboxes with False
+    tp_fp_pred_bboxes_padded: tuple[np.ndarray, ...] = ()
+    for output in [true_positives, false_positives]:
+        output_padded = np.pad(
+            output,
+            (0, n_pred_bboxes_padded - len(output)),
+            mode="constant",
+            constant_values=False,
+        )
+        tp_fp_pred_bboxes_padded += (output_padded,)
+
+    # Pad true_positives_iou for pred_bboxes with nan
+    true_positives_iou_padded = np.pad(
+        true_positives_iou,
+        (0, n_pred_bboxes_padded - len(true_positives_iou)),
+        mode="constant",
+        constant_values=np.nan,
+    )
+
+    # Pad results for gt_bboxes with False
+    missed_detections_padded = np.pad(
+        missed_detections,
+        (0, n_gt_bboxes_padded - len(missed_detections)),
+        mode="constant",
+        constant_values=False,
+    )
+    return tp_fp_pred_bboxes_padded + (
+        missed_detections_padded,
+        true_positives_iou_padded,
+    )
+
+
+def compute_precision_recall_ds(
+    pred_bboxes_ds: xr.Dataset,
+    gt_bboxes_ds: xr.Dataset,
+    iou_threshold: float,
+) -> tuple[xr.Dataset, xr.Dataset]:
+    """Compute precision and recall per image."""
+    # Compute true positives, false positives, and missed detections
+    pred_bboxes_ds, gt_bboxes_ds = evaluate_detections_hungarian_ds(
+        pred_bboxes_ds=pred_bboxes_ds,
+        gt_bboxes_ds=gt_bboxes_ds,
+        iou_threshold=iou_threshold,
+    )
+
+    # Compute precision and recall per image
+    precision_per_img = pred_bboxes_ds.tp.sum(dim="id") / (
+        pred_bboxes_ds.tp.sum(dim="id") + pred_bboxes_ds.fp.sum(dim="id")
+    )
+    recall_per_img = pred_bboxes_ds.tp.sum(dim="id") / (
+        pred_bboxes_ds.tp.sum(dim="id") + gt_bboxes_ds.md.sum(dim="id")
+    )
+
+    # Add to datasets
+    pred_bboxes_ds["precision"] = precision_per_img
+    pred_bboxes_ds["recall"] = recall_per_img
+
+    return pred_bboxes_ds, gt_bboxes_ds
+
+
+def _add_bboxes_min_max_corners(ds):
+    """Add xy_min and xy_max arrays to ds.
+
+    # Compare to torchvision.ops.box_convert in testing?
+    box_convert(
+        torch.from_numpy(np.c_[ds.position.T, ds.shape.T]),
+        in_fmt="cxcywh",
+        out_fmt="xyxy",
+    )
+    """
+    ds["xy_min"] = ds.position - 0.5 * ds.shape
+    ds["xy_max"] = ds.position + 0.5 * ds.shape
+    return ds
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index 564b8ca5..39a71dd3 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -11,12 +11,12 @@
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
 from ethology.detectors.ensembles.models import EnsembleDetector
-
-# from ethology.detectors.evaluate import compute_precision_recall_ds
-# from ethology.io.annotations import load_bboxes
+from ethology.detectors.evaluate import compute_precision_recall_ds
+from ethology.io.annotations import load_bboxes
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
+
 # Helper functions
 def create_coco_dataset(
     images_dir: str | Path,
@@ -106,6 +106,11 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Define a YAML config file for the ensemble of trained detectors
+experiment_ID = "617393114420881798"
+ml_runs_experiment_dir = (
+    Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID
+)
+last_ckpt = Path("checkpoints") / "last.ckpt"
 
 config = {
     "models": {
@@ -117,15 +122,43 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
             "weights_backbone": None,
         },
         "checkpoints": [
-            "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt",
-            "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt",
+            str(
+                ml_runs_experiment_dir
+                / "f348d9d196934073bece1b877cbc4d38"
+                / last_ckpt
+            ),  # above_0th
+            str(
+                ml_runs_experiment_dir
+                / "879d2f77e2b24adcb06b87d2fede6a04"
+                / last_ckpt
+            ),  # above_1st
+            str(
+                ml_runs_experiment_dir
+                / "75583ec227e3444ab692b99c64795325"
+                / last_ckpt
+            ),  # above_5th
+            str(
+                ml_runs_experiment_dir
+                / "4acc37206b1e4f679d535c837bee2c2f"
+                / last_ckpt
+            ),  # above_10th
+            str(
+                ml_runs_experiment_dir
+                / "fdcf88fcbcc84fbeb94b45ca6b6f8914"
+                / last_ckpt
+            ),  # above_25th
+            str(
+                ml_runs_experiment_dir
+                / "daa05ded0ea047388c9134bf044061c5"
+                / last_ckpt
+            ),  # above_50th
         ],
     },
     "fusion": {
         "method": "wbf",
         "iou_th_ensemble": 0.5,
         "skip_box_th": 0.0001,
-        "n_jobs": 2,  # workers for joblib.Parallel, n_workers should be <= number of CPU cores
+        "n_jobs": -1,  # workers for joblib.Parallel, n_workers should be <= number of CPU cores
         # "confidence_threshold_post_fusion": 0.0,
         # "max_n_detections": 300
     },
@@ -150,23 +183,46 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# # Evaluate the ensemble model
-# # - load ground truth
-# # - compute metrics
+# Remove low confidence detections
+confidence_threshold_post_fusion = 0.5
+fused_detections_ds_ = fused_detections_ds.where(
+    fused_detections_ds.confidence >= confidence_threshold_post_fusion
+)
 
-# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Evaluate the ensemble model
+# - load ground truth
+# - compute metrics
 
+gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
 
-# fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds(
-#     pred_bboxes_ds=fused_detections_ds,
-#     gt_bboxes_ds=gt_bboxes_ds,
-#     iou_threshold=0.1,  # change to 0.5?
-# )
+iou_threshold_tp = 0.25
+fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds(
+    pred_bboxes_ds=fused_detections_ds_,
+    gt_bboxes_ds=gt_bboxes_ds,
+    iou_threshold=iou_threshold_tp,
+)
 
+# All models on full August dataset, without removing low confidence detections:
+# confidence_threshold_post_fusion = 0.0
+# Precision: 0.5920
+# Recall: 0.8455
+# ---
+# confidence_threshold_post_fusion = 0.4
+# Precision: 0.8339
+# Recall: 0.7177
+# ---
+# confidence_threshold_post_fusion = 0.5
+# Precision: 0.8714
+# Recall: 0.6624
+# ---
+
+print(
+    "Ensemble model with confidence threshold post fusion: "
+    f"{confidence_threshold_post_fusion:.2f}"
+)
+print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}")
+print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}")
 
-# print(
-#     "Ensemble model with confidence threshold post fusion: "
-#     f"{ensemble_detector.config['fusion']['confidence_threshold_post_fusion']}"
-# )
-# print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}")
-# print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}")
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Plot calibration curve

From e70f70f4354be23ef4965bb50d4bad455e39035a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 14 Nov 2025 20:57:44 +0000
Subject: [PATCH 03/39] Split ensemble inference and fusion

---
 ethology/detectors/ensembles/fusion.py | 300 ++++++++++++++++++++++---
 ethology/detectors/ensembles/models.py | 163 +++++++-------
 ethology/detectors/ensembles/utils.py  |  85 +------
 examples/ensemble_of_detectors.py      |  50 ++++-
 4 files changed, 391 insertions(+), 207 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 58521a69..e230aa18 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -1,40 +1,270 @@
 """Wrappers around ensemble-boxes fusion functions."""
+
 import numpy as np
+import xarray as xr
 from ensemble_boxes import weighted_boxes_fusion
 
 
-def weighted_boxes_fusion_in_pixels(
-    image_height_width: tuple[int, int],
-    boxes_list: list[np.ndarray],
-    scores_list: list[np.ndarray],
-    labels_list: list[np.ndarray],
-    iou_thr: float,
-    skip_box_thr: float,
-):
-    """Fuse bboxes for a single image and return in pixels."""
-    # Normalize boxes using image shape
-    image_height, image_width = image_height_width
-    boxes_list = [
-        boxes
-        / np.array([image_width, image_height, image_width, image_height])
-        if len(boxes) > 0
-        else boxes
-        for boxes in boxes_list
-    ]
-
-    # Apply WBF
-    fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion(
-        boxes_list,
-        scores_list,
-        labels_list,
-        iou_thr=iou_thr,
-        skip_box_thr=skip_box_thr,
-    )
-
-    # Denormalize boxes
-    # Format of returned bboxes is x1y1x2y2 in pixels like fasterrcnn
-    fused_boxes = fused_boxes * np.array(
-        [image_width, image_height, image_width, image_height]
-    )
-
-    return fused_boxes, fused_scores, fused_labels
\ No newline at end of file
+# TODO: review shapes are ok in docstring
+def _weighted_boxes_fusion_arrays(
+    position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
+    shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
+    confidence: np.ndarray,  # model, annot
+    label: np.ndarray,  # model, annot
+    image_width_height: np.ndarray,  # = np.array([4096, 2160]),
+    iou_thr_ensemble: float = 0.5,
+    skip_box_thr: float = 0.0001,
+    max_n_detections: int = 300,
+    # confidence_th_post_fusion: float = 0.7,
+) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
+    """Wrap weighted boxes fusion to receive arrays as input.
+
+    Weighted boxes fusion fused boxes of one image.
+
+    Parameters
+    ----------
+    position: np.ndarray
+        Detected positions of bounding boxes in a single image, with shape
+        2, n_annot, n_models.
+    shape: np.ndarray
+        Detected shapes of bounding boxes in a single image, with shape
+        2, n_annot, n_models.
+    confidence: np.ndarray
+        Confidence scores for each bounding box, with shape
+        n_annotations, n_models.
+    label: np.ndarray
+        Labels for each bounding box, with shape n_annotations, n_models.
+    image_width_height: np.ndarray
+        Width and height of the image, with shape 2.
+    iou_thr_ensemble: float
+        IoU threshold for detections to be considered for fusion.
+    skip_box_thr: float
+        Threshold for skipping boxes with confidence below this value.
+    max_n_detections: int
+        Fused bounding boxes arrays are padded to this total number of boxes.
+        Its value should be larger than the expected maximum number of
+        detections per image after fusing across models.
+    confidence_th_post_fusion: float
+        Threshold for removing fused detections whose confidence is below
+        this value.
+
+    Returns
+    -------
+    tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]
+        Tuple of xr.DataArrays containing the fused detections. The arrays
+        are padded to max_n_detections and contain the data for the centroid,
+        shape, confidence and label of the fused detections.
+
+    """
+    # Prepare boxes array --> (position, shape) to x1y1x2y normalised
+    bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None]
+    bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None]
+    bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2])
+    # 4, n_annot, n_models
+
+    # Get list of bboxes per model
+    # arrays need to be tall for WBF
+    n_models = bboxes_x1y1_x2y2_normalised.shape[-1]
+    list_bboxes_per_model = [
+        arr.squeeze()
+        for arr in np.split(bboxes_x1y1_x2y2_normalised, n_models, axis=-1)
+    ]
+    list_confidence_per_model = [
+        arr.squeeze() for arr in np.split(confidence, n_models, axis=-1)
+    ]
+    list_label_per_model = [
+        arr.squeeze() for arr in np.split(label, n_models, axis=-1)
+    ]
+
+    # Remove rows with nan coordinates
+    list_bboxes_per_model = [
+        arr[:, ~np.any(np.isnan(arr), axis=0)].T
+        for arr in list_bboxes_per_model
+    ]
+    list_confidence_per_model = [
+        conf_arr[: bbox_arr.shape[0]]
+        for bbox_arr, conf_arr in zip(
+            list_bboxes_per_model,
+            list_confidence_per_model,
+            strict=True,
+        )
+    ]
+    list_label_per_model = [
+        label_arr[: bbox_arr.shape[0]]
+        for bbox_arr, label_arr in zip(
+            list_bboxes_per_model,
+            list_label_per_model,
+            strict=True,
+        )
+    ]
+    # ------------------------------------
+    # Run WBF on one image
+    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
+        weighted_boxes_fusion(
+            list_bboxes_per_model,
+            list_confidence_per_model,
+            list_label_per_model,
+            iou_thr=iou_thr_ensemble,
+            skip_box_thr=skip_box_thr,
+        )
+    )
+
+    # ------------------------------------
+    # Undo boxes x1y1 x2y2 normalization
+    ensemble_x1y1_x2y2 = ensemble_x1y1_x2y2_norm * np.tile(
+        image_width_height, (1, 2)
+    )
+
+    # Combine x1y1, x2y2, scores and labels in one array
+    ensemble_x1y2_x2y2_scores_labels = np.c_[
+        ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels
+    ]
+
+    # Remove rows with nan coordinates
+    ensemble_x1y2_x2y2_scores_labels = ensemble_x1y2_x2y2_scores_labels[
+        ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1)
+    ]
+
+    # Pad combined array to max_n_detections
+    # (this is required to concatenate across image_ids
+    ensemble_x1y2_x2y2_scores_labels = np.pad(
+        ensemble_x1y2_x2y2_scores_labels,
+        (
+            (0, max_n_detections - ensemble_x1y2_x2y2_scores_labels.shape[0]),
+            (0, 0),
+        ),
+        "constant",
+        constant_values=np.nan,
+    )
+
+    # Format output as xarray dataarrays
+    centroid, shape, confidence, label = _x1y1_x2y2_as_da_tuple(
+        ensemble_x1y2_x2y2_scores_labels[:, 0:4],
+        ensemble_x1y2_x2y2_scores_labels[:, 4],
+        ensemble_x1y2_x2y2_scores_labels[:, 5],
+    )
+
+    return centroid, shape, confidence, label
+
+
+def _x1y1_x2y2_as_da_tuple(
+    x1y1_x2y2_array: np.ndarray,
+    scores_array: np.ndarray,
+    labels_array: np.ndarray,
+    id_array: np.ndarray | None = None,
+) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
+    """Reshape detections / tracks array as xarray dataset.
+
+    Input is detections array with shape [N, 4], x1y1x2y2 in pixels
+    """
+    n_detections = x1y1_x2y2_array.shape[0]
+    if id_array is None:
+        id_array = np.arange(n_detections)
+
+    # centroid dataarray
+    centroid_da = xr.DataArray(
+        data=0.5
+        * (
+            x1y1_x2y2_array[:, 0:2] + x1y1_x2y2_array[:, 2:4]
+        ).T,  # space, annot ID
+        dims=["space", "id"],
+        coords={
+            "space": ["x", "y"],
+            "id": id_array,
+        },
+    )
+
+    # shape dataarray
+    shape_da = xr.DataArray(
+        data=(
+            x1y1_x2y2_array[:, 2:4] - x1y1_x2y2_array[:, 0:2]
+        ).T,  # space, annot ID
+        dims=["space", "id"],
+        coords={
+            "space": ["x", "y"],
+            "id": id_array,
+        },
+    )
+
+    # confidence dataarray
+    confidence_da = xr.DataArray(
+        data=scores_array,
+        dims=["id"],
+        coords={"id": id_array},
+    )
+
+    # label dataarray
+    label_da = xr.DataArray(
+        data=labels_array,
+        dims=["id"],
+        coords={"id": id_array},
+    )
+
+    return centroid_da, shape_da, confidence_da, label_da
+
+
+def WBF_across_models(
+    ensemble_detections_ds: xr.Dataset,
+    image_width_height: np.ndarray,
+    iou_thr_ensemble: float = 0.5,
+    skip_box_thr: float = 0.0001,
+    max_n_detections: int = 300,
+) -> xr.Dataset:
+    """Fuse detections across models using WBF."""
+
+    wbf_kwargs = {
+        "iou_thr_ensemble": iou_thr_ensemble,
+        "skip_box_thr": skip_box_thr,
+        "max_n_detections": max_n_detections,
+        "image_width_height": image_width_height,
+    }
+
+    # Run WBF across image_id
+    centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
+        xr.apply_ufunc(
+            _weighted_boxes_fusion_arrays,
+            ensemble_detections_ds.position,  # .data array is passed
+            ensemble_detections_ds.shape,
+            ensemble_detections_ds.confidence,
+            ensemble_detections_ds.label,
+            kwargs=wbf_kwargs,
+            input_core_dims=[  # do not broadcast across these
+                ["space", "id", "model"],
+                ["space", "id", "model"],
+                ["id", "model"],
+                ["id", "model"],
+            ],
+            output_core_dims=[
+                ["space", "id"],
+                ["space", "id"],
+                ["id"],
+                ["id"],
+            ],
+            vectorize=True,
+            # loop over non-core dims (i.e. image_id);
+            # assumes function only takes arrays over core dims as input
+            exclude_dims={"id"},
+            # to allow dimensions that change size btw input and output
+        )
+    )
+
+    # Remove pad across annotations
+    centroid_fused_da = centroid_fused_da.dropna(dim="id", how="all")
+    shape_fused_da = shape_fused_da.dropna(dim="id", how="all")
+    confidence_fused_da = confidence_fused_da.dropna(dim="id", how="all")
+    label_fused_da = label_fused_da.dropna(dim="id", how="all")
+
+    # Pad labels with -1 rather than nan
+    label_fused_da = label_fused_da.fillna(-1).astype(int)
+
+    # Return a dataset
+    # FIX: why is id not a coordinate in the output dataset?
+    # FIX: order of dimensions should be image_id, space, id
+    return xr.Dataset(
+        data_vars={
+            "position": centroid_fused_da,
+            "shape": shape_fused_da,
+            "confidence": confidence_fused_da,
+            "label": label_fused_da,
+        }
+    )
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index a42ce1ef..e738cc96 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -9,14 +9,9 @@
 import torchvision.models.detection as detection_models
 import xarray as xr
 import yaml
-from joblib import Parallel, delayed
 from lightning import LightningModule
 
-from ethology.detectors.ensembles.fusion import weighted_boxes_fusion_in_pixels
-from ethology.detectors.ensembles.utils import (
-    arrays_to_ds_variables,
-    pad_to_max_first_dimension,
-)
+from ethology.detectors.ensembles.utils import pad_to_max_first_dimension
 
 
 class EnsembleDetector(LightningModule):
@@ -26,6 +21,7 @@ class EnsembleDetector(LightningModule):
     ----------
     config_file: str
         Path to the YAML config file.
+
     """
 
     def __init__(self, config_file: str | Path):
@@ -69,45 +65,6 @@ def load_models(self) -> nn.ModuleList:
             list_models.append(model)
         return nn.ModuleList(list_models)
 
-    def fuse_bboxes(self, images_batch, predictions_per_model: list[dict]):
-        """Fuse bboxes per sample in CPU in parallel."""
-        # Fuse bboxes per sample in CPU in parallel
-        # Dispatch fusion tasks to executor (non-blocking)
-        # if self.config["fusion"]["method"] == "wbf"
-
-        # n_jobs = -1 means Use ALL available CPU cores
-        # n_jobs = -2 means Use ALL available CPU cores except one
-        n_jobs = self.config["fusion"].get("n_jobs", -1)
-
-        # Parallel WBF fusion
-        batch_size = len(images_batch)
-        results_batch = Parallel(n_jobs=n_jobs)(
-            delayed(weighted_boxes_fusion_in_pixels)(
-                images_batch[i].shape[-2:],  # image height and width
-                [
-                    preds[i]["boxes"].cpu().numpy()
-                    for preds in predictions_per_model
-                ],  # same image across all models
-                [
-                    preds[i]["scores"].cpu().numpy()
-                    for preds in predictions_per_model
-                ],
-                [
-                    preds[i]["labels"].cpu().numpy()
-                    for preds in predictions_per_model
-                ],
-                self.config["fusion"]["iou_th_ensemble"],
-                self.config["fusion"]["skip_box_th"],
-            )
-            for i in range(batch_size)
-        )  # list [(bboxes, scores, labels) * batch_size]
-
-        fused_boxes_batch, fused_scores_batch, fused_labels_batch = (
-            zip(*results_batch, strict=True) if results_batch else ([], [], [])
-        )
-
-        return fused_boxes_batch, fused_scores_batch, fused_labels_batch
-
     def predict_step(self, batch, batch_idx):
         """Predict step for a single batch."""
         # ------------------------------
@@ -115,50 +72,88 @@ def predict_step(self, batch, batch_idx):
         # TODO: can I vectorize this?
         # https://docs.pytorch.org/tutorials/intermediate/ensembling.html
         images_batch, _annotations_batch = batch
-        predictions_per_model = [
+        raw_prediction_dicts_per_model = [
             model(images_batch) for model in self.list_models
         ]  # [num_models][batch_size]
 
-        # ------------------------------
-        # Fuse bboxes per sample in CPU in parallel
-        fused_boxes_batch, fused_scores_batch, fused_labels_batch = (
-            self.fuse_bboxes(images_batch, predictions_per_model)
-        )
+        # Transpose to [batch_size][num_models] for easier downstream
+        # processing
+        raw_prediction_dicts_per_sample = [
+            list(one_sample_all_models)
+            for one_sample_all_models in zip(
+                *raw_prediction_dicts_per_model, strict=True
+            )
+        ]  # [batch_size][num_models]
+
+        return raw_prediction_dicts_per_sample
+
+    def format_predictions(self) -> xr.Dataset:
+        """Format as ethology detections dataset with model axis."""
+        # Get results from trainer
+        raw_predictions_per_model = self.trainer.predict_loop.predictions
+
+        # Flatten batches
+        raw_prediction_dicts_per_sample = list(
+            chain.from_iterable(raw_predictions_per_model)
+        )  # [sample][model]
+
+        # Parse output from dicts
+        output_per_sample = {"boxes": [], "scores": [], "labels": []}
+        for ky in output_per_sample:
+            output_per_sample[ky] = [
+                [sample[m][ky] for m in range(len(self.list_models))]
+                for sample in raw_prediction_dicts_per_sample
+            ]  # [sample][model]
+
+        # Pad across models and across image_ids
+        fill_value = {"boxes": np.nan, "scores": np.nan, "labels": -1}
+        output_per_sample_padded = {ky: [] for ky in output_per_sample}
+        for ky in output_per_sample_padded:
+            output_per_sample_padded[ky] = pad_to_max_first_dimension(
+                [
+                    # pad across models
+                    np.stack(
+                        pad_to_max_first_dimension(
+                            output_one_sample, fill_value[ky]
+                        ),
+                        axis=-1,
+                    )
+                    for output_one_sample in output_per_sample[ky]
+                ],
+                fill_value[ky],
+            )
 
-        return fused_boxes_batch, fused_scores_batch, fused_labels_batch
-
-    @staticmethod
-    def format_predictions(raw_predictions):
-        """Format as ethology detections dataset."""
-        # Unzip data per batch
-        (
-            fused_boxes_per_batch,
-            fused_scores_per_batch,
-            fused_labels_per_batch,
-        ) = zip(*raw_predictions, strict=True)  # [n_batches][batch_size]
-
-        # Flatten across all batches
-        fused_boxes = list(chain.from_iterable(fused_boxes_per_batch))
-        fused_scores = list(chain.from_iterable(fused_scores_per_batch))
-        fused_labels = list(chain.from_iterable(fused_labels_per_batch))
-
-        # Pad arrays to max n of detections per image
-        fused_boxes_padded = pad_to_max_first_dimension(fused_boxes)
-        fused_scores_padded = pad_to_max_first_dimension(fused_scores)
-        fused_labels_padded = pad_to_max_first_dimension(fused_labels)
-
-        # Stack into arrays
+        # Stack and reorder dimensions
         bboxes_array = np.transpose(
-            np.stack(fused_boxes_padded), (0, -1, 1)
-        )  # image_id, space-4, id
-        scores_array = np.stack(fused_scores_padded)
-        labels_array = np.stack(fused_labels_padded)
+            np.stack(output_per_sample_padded["boxes"]),
+            (0, -2, 1, -1),
+        )
+        scores_array = np.stack(output_per_sample_padded["scores"])
+        labels_array = np.stack(output_per_sample_padded["labels"])
+        # arrays of shape (image_id, 4/1, n_max_detections, n_models)
+
+        # Compute centroid and shape arrays
+        centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4])
+        shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2]
 
-        # ------------------------------
         # Return as ethology detections dataset
-        ds_variables = arrays_to_ds_variables(
-            bboxes_array, scores_array, labels_array
+        max_n_detections = bboxes_array.shape[-2]
+        n_images = bboxes_array.shape[0]
+
+        return xr.Dataset(
+            data_vars={
+                "position": (
+                    ["image_id", "space", "id", "model"],
+                    centroid_array,
+                ),
+                "shape": (["image_id", "space", "id", "model"], shape_array),
+                "confidence": (["image_id", "id", "model"], scores_array),
+                "label": (["image_id", "id", "model"], labels_array),
+            },
+            coords={
+                "image_id": np.arange(n_images),
+                "space": ["x", "y"],
+                "id": np.arange(max_n_detections),
+                "model": np.arange(len(self.list_models)),
+            },
         )
-        detections_ds = xr.Dataset(data_vars=ds_variables)
-
-        return detections_ds
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
index f39956ed..53f24dce 100644
--- a/ethology/detectors/ensembles/utils.py
+++ b/ethology/detectors/ensembles/utils.py
@@ -1,4 +1,5 @@
 """Utility functions for reshaping outputs of ensembles of detectors."""
+
 import numpy as np
 import xarray as xr
 
@@ -10,7 +11,7 @@ def get_padding_width(array, max_n):
     return pad_width
 
 
-def pad_to_max_first_dimension(list_arrays):
+def pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
     """Pad arrays to maximum number across all arrays in the first dimension."""
     max_n_detections = max(array.shape[0] for array in list_arrays)
     list_arrays_padded = [
@@ -18,90 +19,10 @@ def pad_to_max_first_dimension(list_arrays):
             arr,
             get_padding_width(arr, max_n_detections),
             mode="constant",
-            constant_values=np.nan,
+            constant_values=fill_value,
         )
         for arr in list_arrays
     ]
     return list_arrays_padded
 
 
-def arrays_to_ds_variables(
-    bboxes_x1y1x2y2_array: np.ndarray,
-    scores_array: np.ndarray,
-    labels_array: np.ndarray,
-    id_array: np.ndarray | None = None,
-) -> dict[str, xr.DataArray]:
-    """Convert arrays to dictionary of dataset variables.
-
-    Parameters
-    ----------
-    bboxes_x1y1x2y2_array: np.ndarray
-        Array of bounding box coordinates with shape
-        [Nimages, 4, Nmax_detections], in format x1y1x2y2 in units of pixels.
-        Nmax_detections is the maximum number of detections per image.
-    scores_array: np.ndarray
-        Array of shape [Nimages, Nmax_detections]
-    labels_array: np.ndarray
-        Array of shape [Nimages, Nmax_detections]
-    id_array: np.ndarray | None, optional
-        Array of shape [Nmax_detections]. If None, will be set to
-        range(Nmax_detections).
-    """
-    n_images = bboxes_x1y1x2y2_array.shape[0]
-    n_max_detections = bboxes_x1y1x2y2_array.shape[-1]
-    if id_array is None:
-        id_array = np.arange(n_max_detections)
-
-    # centroid dataarray (x, y)
-    centroid_da = xr.DataArray(
-        data=0.5
-        * (
-            bboxes_x1y1x2y2_array[:, 0:2, :] + bboxes_x1y1x2y2_array[:, 2:4, :]
-        ), 
-        dims=["image_id", "space", "id"],
-        coords={
-            "image_id": np.arange(n_images),
-            "space": ["x", "y"],
-            "id": id_array,
-        },
-    )
-
-    # shape dataarray (width, height)
-    shape_da = xr.DataArray(
-        data=(
-            bboxes_x1y1x2y2_array[:, 2:4, :] - bboxes_x1y1x2y2_array[:, 0:2, :]
-        ),
-        dims=["image_id", "space", "id"],
-        coords={
-            "image_id": np.arange(n_images),
-            "space": ["x", "y"],
-            "id": id_array,
-        },
-    )
-
-    # confidence dataarray
-    confidence_da = xr.DataArray(
-        data=scores_array,
-        dims=["image_id", "id"],
-        coords={
-            "image_id": np.arange(n_images),
-            "id": id_array,
-        },
-    )
-
-    # label dataarray
-    label_da = xr.DataArray(
-        data=labels_array,
-        dims=["image_id", "id"],
-        coords={
-            "image_id": np.arange(n_images),
-            "id": id_array,
-        },
-    )
-
-    return {
-        "position": centroid_da,
-        "shape": shape_da,
-        "confidence": confidence_da,
-        "label": label_da,
-    }
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index 39a71dd3..144456b7 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -1,8 +1,10 @@
 # %%
 # imports
 
+from itertools import chain
 from pathlib import Path
 
+import numpy as np
 import torch
 import torchvision.transforms.v2 as transforms
 import yaml
@@ -10,6 +12,7 @@
 from torch.utils.data import DataLoader
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
+from ethology.detectors.ensembles.fusion import WBF_across_models
 from ethology.detectors.ensembles.models import EnsembleDetector
 from ethology.detectors.evaluate import compute_precision_recall_ds
 from ethology.io.annotations import load_bboxes
@@ -176,11 +179,26 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Run the ensemble of detectors on a dataset
 # Use Trainer for inference (this sets the device flexibly)
 trainer = Trainer(accelerator="gpu", devices=1, logger=False)
-raw_predictions = trainer.predict(ensemble_detector, dataloader)
+_ = trainer.predict(ensemble_detector, dataloader)
+# [batch][sample][model]- dict
 
-# format predictions as ethology detections dataset
-fused_detections_ds = ensemble_detector.format_predictions(raw_predictions)
+# Format predictions as ethology detections dataset
+# TODO: think about syntax of format_predictions (should it be instance or 
+# static method instead?)
+ensemble_detections_ds = ensemble_detector.format_predictions()
 
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Fuse detections across models
+# TODO: think whether joblib approach is more readable?
+image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] 
+
+fused_detections_ds = WBF_across_models(
+    ensemble_detections_ds,
+    image_width_height=image_width_height,
+    iou_thr_ensemble=0.5,
+    skip_box_thr=0.0001,
+    max_n_detections=300,
+)
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Remove low confidence detections
@@ -197,7 +215,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
 
 iou_threshold_tp = 0.25
-fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds(
+fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds(
     pred_bboxes_ds=fused_detections_ds_,
     gt_bboxes_ds=gt_bboxes_ds,
     iou_threshold=iou_threshold_tp,
@@ -221,8 +239,28 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
     "Ensemble model with confidence threshold post fusion: "
     f"{confidence_threshold_post_fusion:.2f}"
 )
-print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}")
-print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}")
+print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}")
+print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}")
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Plot calibration curve
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Evaluate single models
+list_detections_ds_eval = []
+for k in range(ensemble_detections_ds.sizes["model"]):   
+    detections_ds, _ = compute_precision_recall_ds(
+        pred_bboxes_ds=ensemble_detections_ds.sel(model=k),
+        gt_bboxes_ds=gt_bboxes_ds,
+        iou_threshold=iou_threshold_tp
+    )
+    list_detections_ds_eval.append(detections_ds)
+
+    print(f"Model: {k}")
+    print(f"Precision: {detections_ds.precision.mean().values:.4f}")
+    print(f"Recall: {detections_ds.recall.mean().values:.4f}")
+    print("--------------------------------")
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Visualise detections

From 7285fc7e1e61b160e1335d83c00791c53bd39288 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 18 Nov 2025 11:20:18 +0000
Subject: [PATCH 04/39] Simplify fusion module

---
 ethology/detectors/ensembles/fusion.py | 209 +++++++++++++++----------
 examples/ensemble_of_detectors.py      |  14 +-
 2 files changed, 134 insertions(+), 89 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index e230aa18..0756bf8a 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -6,7 +6,7 @@
 
 
 # TODO: review shapes are ok in docstring
-def _weighted_boxes_fusion_arrays(
+def _fuse_single_image_detections_WBF(
     position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
     shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
     confidence: np.ndarray,  # model, annot
@@ -17,9 +17,7 @@ def _weighted_boxes_fusion_arrays(
     max_n_detections: int = 300,
     # confidence_th_post_fusion: float = 0.7,
 ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Wrap weighted boxes fusion to receive arrays as input.
-
-    Weighted boxes fusion fused boxes of one image.
+    """Fuse detections across models for a single image using WBF.
 
     Parameters
     ----------
@@ -43,7 +41,7 @@ def _weighted_boxes_fusion_arrays(
     max_n_detections: int
         Fused bounding boxes arrays are padded to this total number of boxes.
         Its value should be larger than the expected maximum number of
-        detections per image after fusing across models.
+        detections per image **after** fusing across models.
     confidence_th_post_fusion: float
         Threshold for removing fused detections whose confidence is below
         this value.
@@ -56,7 +54,50 @@ def _weighted_boxes_fusion_arrays(
         shape, confidence and label of the fused detections.
 
     """
-    # Prepare boxes array --> (position, shape) to x1y1x2y normalised
+    # Prepare single image arrays for fusion
+    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
+        _preprocess_single_image_detections(
+            position, shape, confidence, label, image_width_height
+        )
+    )
+
+    # ------------------------------------
+    # Run WBF on one image
+    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
+        weighted_boxes_fusion(
+            list_bboxes_per_model,
+            list_confidence_per_model,
+            list_label_per_model,
+            iou_thr=iou_thr_ensemble,
+            skip_box_thr=skip_box_thr,
+        )
+    )
+
+    # ------------------------------------
+
+    # Format output as xarray dataarrays
+    centroid_da, shape_da, confidence_da, label_da = (
+        _postprocess_single_image_detections(
+            ensemble_x1y1_x2y2_norm,
+            ensemble_scores,
+            ensemble_labels,
+            image_width_height,
+            max_n_detections,
+        )
+    )
+
+    return centroid_da, shape_da, confidence_da, label_da
+
+
+def _preprocess_single_image_detections(
+    position: xr.DataArray,
+    shape: xr.DataArray,
+    confidence: xr.DataArray,
+    label: xr.DataArray,
+    image_width_height: np.ndarray,
+) -> list[np.ndarray]:
+    """Prepare ensemble detections on a single image for fusion."""
+    # Prepare boxes array --> position, shape arrays to x1y1x2y normalised
     bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None]
     bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None]
     bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2])
@@ -97,19 +138,22 @@ def _weighted_boxes_fusion_arrays(
             strict=True,
         )
     ]
-    # ------------------------------------
-    # Run WBF on one image
-    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
-        weighted_boxes_fusion(
-            list_bboxes_per_model,
-            list_confidence_per_model,
-            list_label_per_model,
-            iou_thr=iou_thr_ensemble,
-            skip_box_thr=skip_box_thr,
-        )
+
+    return (
+        list_bboxes_per_model,
+        list_confidence_per_model,
+        list_label_per_model,
     )
 
-    # ------------------------------------
+
+def _postprocess_single_image_detections(
+    ensemble_x1y1_x2y2_norm,
+    ensemble_scores,
+    ensemble_labels,
+    image_width_height,
+    max_n_detections,
+):
+    """Unnormalise, pad and format fused single-image detections as data arrays."""
     # Undo boxes x1y1 x2y2 normalization
     ensemble_x1y1_x2y2 = ensemble_x1y1_x2y2_norm * np.tile(
         image_width_height, (1, 2)
@@ -126,7 +170,7 @@ def _weighted_boxes_fusion_arrays(
     ]
 
     # Pad combined array to max_n_detections
-    # (this is required to concatenate across image_ids
+    # (this is required to concatenate across image_ids)
     ensemble_x1y2_x2y2_scores_labels = np.pad(
         ensemble_x1y2_x2y2_scores_labels,
         (
@@ -138,79 +182,83 @@ def _weighted_boxes_fusion_arrays(
     )
 
     # Format output as xarray dataarrays
-    centroid, shape, confidence, label = _x1y1_x2y2_as_da_tuple(
-        ensemble_x1y2_x2y2_scores_labels[:, 0:4],
-        ensemble_x1y2_x2y2_scores_labels[:, 4],
-        ensemble_x1y2_x2y2_scores_labels[:, 5],
+    centroid_da, shape_da, confidence_da, label_da = (
+        _single_image_detections_as_dataarrays(
+            ensemble_x1y2_x2y2_scores_labels[:, 0:4],
+            ensemble_x1y2_x2y2_scores_labels[:, 4],
+            ensemble_x1y2_x2y2_scores_labels[:, 5],
+        )
     )
 
-    return centroid, shape, confidence, label
+    return centroid_da, shape_da, confidence_da, label_da
 
 
-def _x1y1_x2y2_as_da_tuple(
+def _single_image_detections_as_dataarrays(
     x1y1_x2y2_array: np.ndarray,
     scores_array: np.ndarray,
     labels_array: np.ndarray,
     id_array: np.ndarray | None = None,
 ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Reshape detections / tracks array as xarray dataset.
-
-    Input is detections array with shape [N, 4], x1y1x2y2 in pixels
-    """
-    n_detections = x1y1_x2y2_array.shape[0]
+    """Format single image fused detections as data arrays."""
     if id_array is None:
+        n_detections = x1y1_x2y2_array.shape[0]
         id_array = np.arange(n_detections)
 
-    # centroid dataarray
-    centroid_da = xr.DataArray(
-        data=0.5
-        * (
-            x1y1_x2y2_array[:, 0:2] + x1y1_x2y2_array[:, 2:4]
-        ).T,  # space, annot ID
-        dims=["space", "id"],
-        coords={
-            "space": ["x", "y"],
-            "id": id_array,
-        },
-    )
+    # Extract bbox corner coordinates
+    x1y1, x2y2 = x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4]
 
-    # shape dataarray
-    shape_da = xr.DataArray(
-        data=(
-            x1y1_x2y2_array[:, 2:4] - x1y1_x2y2_array[:, 0:2]
-        ).T,  # space, annot ID
-        dims=["space", "id"],
-        coords={
-            "space": ["x", "y"],
-            "id": id_array,
-        },
-    )
+    # Shared coordinates
+    id_coords = {"id": id_array}
+    spatial_id_coords = {"space": ["x", "y"], **id_coords}
 
-    # confidence dataarray
-    confidence_da = xr.DataArray(
-        data=scores_array,
-        dims=["id"],
-        coords={"id": id_array},
+    # Build all DataArrays
+    return (
+        xr.DataArray(
+            (0.5 * (x1y1 + x2y2)).T,
+            dims=["space", "id"],
+            coords=spatial_id_coords,
+        ),
+        xr.DataArray(
+            (x2y2 - x1y1).T, dims=["space", "id"], coords=spatial_id_coords
+        ),
+        xr.DataArray(scores_array, dims=["id"], coords=id_coords),
+        xr.DataArray(labels_array, dims=["id"], coords=id_coords),
     )
 
-    # label dataarray
-    label_da = xr.DataArray(
-        data=labels_array,
-        dims=["id"],
-        coords={"id": id_array},
-    )
 
-    return centroid_da, shape_da, confidence_da, label_da
+def _postprocess_multi_image_fused_arrays(
+    position: xr.DataArray,
+    shape: xr.DataArray,
+    confidence: xr.DataArray,
+    label: xr.DataArray,
+) -> dict:
+    """Postprocess fused data arrays on multiple images after fusion."""
+    data_arrays = [position, shape, confidence, label]
 
+    # Remove padding across annotations
+    position_da, shape_da, confidence_da, label_da = [
+        da.dropna(dim="id", how="all") for da in data_arrays
+    ]
+
+    # Pad labels with -1 rather than nan
+    label_da = label_da.fillna(-1).astype(int)
 
-def WBF_across_models(
+    return {
+        "position": position_da,
+        "shape": shape_da,
+        "confidence": confidence_da,
+        "label": label_da,
+    }
+
+
+def fuse_ensemble_detections_WBF(
     ensemble_detections_ds: xr.Dataset,
     image_width_height: np.ndarray,
     iou_thr_ensemble: float = 0.5,
     skip_box_thr: float = 0.0001,
     max_n_detections: int = 300,
 ) -> xr.Dataset:
-    """Fuse detections across models using WBF."""
+    """Fuse ensemble detections across models using WBF."""
 
     wbf_kwargs = {
         "iou_thr_ensemble": iou_thr_ensemble,
@@ -222,7 +270,7 @@ def WBF_across_models(
     # Run WBF across image_id
     centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
         xr.apply_ufunc(
-            _weighted_boxes_fusion_arrays,
+            _fuse_single_image_detections_WBF,
             ensemble_detections_ds.position,  # .data array is passed
             ensemble_detections_ds.shape,
             ensemble_detections_ds.confidence,
@@ -248,23 +296,18 @@ def WBF_across_models(
         )
     )
 
-    # Remove pad across annotations
-    centroid_fused_da = centroid_fused_da.dropna(dim="id", how="all")
-    shape_fused_da = shape_fused_da.dropna(dim="id", how="all")
-    confidence_fused_da = confidence_fused_da.dropna(dim="id", how="all")
-    label_fused_da = label_fused_da.dropna(dim="id", how="all")
-
-    # Pad labels with -1 rather than nan
-    label_fused_da = label_fused_da.fillna(-1).astype(int)
+    # Post process data arrays
+    fused_data_arrays = {
+        "position": centroid_fused_da,
+        "shape": shape_fused_da,
+        "confidence": confidence_fused_da,
+        "label": label_fused_da,
+    }
+    fused_data_arrays = _postprocess_multi_image_fused_arrays(
+        **fused_data_arrays
+    )
 
     # Return a dataset
     # FIX: why is id not a coordinate in the output dataset?
     # FIX: order of dimensions should be image_id, space, id
-    return xr.Dataset(
-        data_vars={
-            "position": centroid_fused_da,
-            "shape": shape_fused_da,
-            "confidence": confidence_fused_da,
-            "label": label_fused_da,
-        }
-    )
+    return xr.Dataset(data_vars=fused_data_arrays)
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index 144456b7..7881bc8b 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -12,7 +12,7 @@
 from torch.utils.data import DataLoader
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
-from ethology.detectors.ensembles.fusion import WBF_across_models
+from ethology.detectors.ensembles.fusion import fuse_ensemble_detections_WBF
 from ethology.detectors.ensembles.models import EnsembleDetector
 from ethology.detectors.evaluate import compute_precision_recall_ds
 from ethology.io.annotations import load_bboxes
@@ -182,17 +182,19 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 _ = trainer.predict(ensemble_detector, dataloader)
 # [batch][sample][model]- dict
 
+
 # Format predictions as ethology detections dataset
-# TODO: think about syntax of format_predictions (should it be instance or 
+# TODO: think about syntax of format_predictions (should it be instance or
 # static method instead?)
+# Can it just be output from .predict?
 ensemble_detections_ds = ensemble_detector.format_predictions()
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Fuse detections across models
 # TODO: think whether joblib approach is more readable?
-image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] 
+image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
 
-fused_detections_ds = WBF_across_models(
+fused_detections_ds = fuse_ensemble_detections_WBF(
     ensemble_detections_ds,
     image_width_height=image_width_height,
     iou_thr_ensemble=0.5,
@@ -249,11 +251,11 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Evaluate single models
 list_detections_ds_eval = []
-for k in range(ensemble_detections_ds.sizes["model"]):   
+for k in range(ensemble_detections_ds.sizes["model"]):
     detections_ds, _ = compute_precision_recall_ds(
         pred_bboxes_ds=ensemble_detections_ds.sel(model=k),
         gt_bboxes_ds=gt_bboxes_ds,
-        iou_threshold=iou_threshold_tp
+        iou_threshold=iou_threshold_tp,
     )
     list_detections_ds_eval.append(detections_ds)
 

From 5f98e4831d2184c5fe1dbbf46564996b0af845ca Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 18 Nov 2025 13:43:53 +0000
Subject: [PATCH 05/39] Adding other fusion methods

---
 ethology/detectors/ensembles/fusion.py | 259 ++++++++++++++++---------
 ethology/detectors/ensembles/models.py |   3 +-
 examples/ensemble_of_detectors.py      | 104 ++++++++--
 3 files changed, 255 insertions(+), 111 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 0756bf8a..a32d6252 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -2,93 +2,10 @@
 
 import numpy as np
 import xarray as xr
-from ensemble_boxes import weighted_boxes_fusion
-
-
-# TODO: review shapes are ok in docstring
-def _fuse_single_image_detections_WBF(
-    position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
-    shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
-    confidence: np.ndarray,  # model, annot
-    label: np.ndarray,  # model, annot
-    image_width_height: np.ndarray,  # = np.array([4096, 2160]),
-    iou_thr_ensemble: float = 0.5,
-    skip_box_thr: float = 0.0001,
-    max_n_detections: int = 300,
-    # confidence_th_post_fusion: float = 0.7,
-) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Fuse detections across models for a single image using WBF.
-
-    Parameters
-    ----------
-    position: np.ndarray
-        Detected positions of bounding boxes in a single image, with shape
-        2, n_annot, n_models.
-    shape: np.ndarray
-        Detected shapes of bounding boxes in a single image, with shape
-        2, n_annot, n_models.
-    confidence: np.ndarray
-        Confidence scores for each bounding box, with shape
-        n_annotations, n_models.
-    label: np.ndarray
-        Labels for each bounding box, with shape n_annotations, n_models.
-    image_width_height: np.ndarray
-        Width and height of the image, with shape 2.
-    iou_thr_ensemble: float
-        IoU threshold for detections to be considered for fusion.
-    skip_box_thr: float
-        Threshold for skipping boxes with confidence below this value.
-    max_n_detections: int
-        Fused bounding boxes arrays are padded to this total number of boxes.
-        Its value should be larger than the expected maximum number of
-        detections per image **after** fusing across models.
-    confidence_th_post_fusion: float
-        Threshold for removing fused detections whose confidence is below
-        this value.
-
-    Returns
-    -------
-    tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]
-        Tuple of xr.DataArrays containing the fused detections. The arrays
-        are padded to max_n_detections and contain the data for the centroid,
-        shape, confidence and label of the fused detections.
-
-    """
-    # Prepare single image arrays for fusion
-    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
-        _preprocess_single_image_detections(
-            position, shape, confidence, label, image_width_height
-        )
-    )
-
-    # ------------------------------------
-    # Run WBF on one image
-    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
-        weighted_boxes_fusion(
-            list_bboxes_per_model,
-            list_confidence_per_model,
-            list_label_per_model,
-            iou_thr=iou_thr_ensemble,
-            skip_box_thr=skip_box_thr,
-        )
-    )
-
-    # ------------------------------------
-
-    # Format output as xarray dataarrays
-    centroid_da, shape_da, confidence_da, label_da = (
-        _postprocess_single_image_detections(
-            ensemble_x1y1_x2y2_norm,
-            ensemble_scores,
-            ensemble_labels,
-            image_width_height,
-            max_n_detections,
-        )
-    )
-
-    return centroid_da, shape_da, confidence_da, label_da
+from ensemble_boxes import weighted_boxes_fusion, nms
 
 
+# ----------- Helper functions ---------------------------
 def _preprocess_single_image_detections(
     position: xr.DataArray,
     shape: xr.DataArray,
@@ -251,22 +168,20 @@ def _postprocess_multi_image_fused_arrays(
     }
 
 
+# -------------------------------------
+
+
 def fuse_ensemble_detections_WBF(
     ensemble_detections_ds: xr.Dataset,
     image_width_height: np.ndarray,
-    iou_thr_ensemble: float = 0.5,
-    skip_box_thr: float = 0.0001,
-    max_n_detections: int = 300,
+    max_n_detections: int,
+    wbf_kwargs: dict,
+    # iou_thr_ensemble: float = 0.5,
+    # skip_box_thr: float = 0.0001,
+    # max_n_detections: int = 300,
 ) -> xr.Dataset:
     """Fuse ensemble detections across models using WBF."""
 
-    wbf_kwargs = {
-        "iou_thr_ensemble": iou_thr_ensemble,
-        "skip_box_thr": skip_box_thr,
-        "max_n_detections": max_n_detections,
-        "image_width_height": image_width_height,
-    }
-
     # Run WBF across image_id
     centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
         xr.apply_ufunc(
@@ -275,7 +190,72 @@ def fuse_ensemble_detections_WBF(
             ensemble_detections_ds.shape,
             ensemble_detections_ds.confidence,
             ensemble_detections_ds.label,
-            kwargs=wbf_kwargs,
+            kwargs={
+                "image_width_height": image_width_height,
+                "max_n_detections": max_n_detections,
+                **wbf_kwargs,
+            },
+            input_core_dims=[  # do not broadcast across these
+                ["space", "id", "model"],
+                ["space", "id", "model"],
+                ["id", "model"],
+                ["id", "model"],
+            ],
+            output_core_dims=[
+                ["space", "id"],
+                ["space", "id"],
+                ["id"],
+                ["id"],
+            ],
+            vectorize=True,
+            # loop over non-core dims (i.e. image_id);
+            # assumes function only takes arrays over core dims as input
+            exclude_dims={"id"},
+            # to allow dimensions that change size btw input and output
+        )
+    )
+
+    # Post process data arrays
+    fused_data_arrays = {
+        "position": centroid_fused_da,
+        "shape": shape_fused_da,
+        "confidence": confidence_fused_da,
+        "label": label_fused_da,
+    }
+    fused_data_arrays = _postprocess_multi_image_fused_arrays(
+        **fused_data_arrays
+    )
+
+    # Return a dataset
+    # FIX: why is id not a coordinate in the output dataset?
+    # FIX: order of dimensions should be image_id, space, id
+    return xr.Dataset(data_vars=fused_data_arrays)
+
+
+def fuse_ensemble_detections_NMS(
+    ensemble_detections_ds: xr.Dataset,
+    image_width_height: np.ndarray,
+    max_n_detections: int,
+    nms_kwargs: dict,
+    # iou_thr_ensemble: float = 0.5,
+    # skip_box_thr: float = 0.0001,
+    # max_n_detections: int = 300,
+) -> xr.Dataset:
+    """Fuse ensemble detections across models using WBF."""
+
+        # Run WBF across image_id
+    centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
+        xr.apply_ufunc(
+            _fuse_single_image_detections_NMS,
+            ensemble_detections_ds.position,  # .data array is passed
+            ensemble_detections_ds.shape,
+            ensemble_detections_ds.confidence,
+            ensemble_detections_ds.label,
+            kwargs={
+                "image_width_height": image_width_height,
+                "max_n_detections": max_n_detections,
+                **nms_kwargs,
+            },
             input_core_dims=[  # do not broadcast across these
                 ["space", "id", "model"],
                 ["space", "id", "model"],
@@ -311,3 +291,90 @@ def fuse_ensemble_detections_WBF(
     # FIX: why is id not a coordinate in the output dataset?
     # FIX: order of dimensions should be image_id, space, id
     return xr.Dataset(data_vars=fused_data_arrays)
+
+
+# --------------- Single image ---------------------------
+def _fuse_single_image_detections_WBF(
+    position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
+    shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
+    confidence: np.ndarray,  # model, annot
+    label: np.ndarray,  # model, annot
+    image_width_height: np.ndarray,  # = np.array([4096, 2160]),
+    max_n_detections: int,
+    **wbf_kwargs: dict,  # WBF only kwargs
+) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
+    """Fuse detections across models for a single image using WBF."""
+    # Prepare single image arrays for fusion
+    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
+        _preprocess_single_image_detections(
+            position, shape, confidence, label, image_width_height
+        )
+    )
+
+    # ------------------------------------
+    # Run WBF on one image
+    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
+        weighted_boxes_fusion(
+            list_bboxes_per_model,
+            list_confidence_per_model,
+            list_label_per_model,
+            **wbf_kwargs,
+        )
+    )
+
+    # ------------------------------------
+
+    # Format output as xarray dataarrays
+    centroid_da, shape_da, confidence_da, label_da = (
+        _postprocess_single_image_detections(
+            ensemble_x1y1_x2y2_norm,
+            ensemble_scores,
+            ensemble_labels,
+            image_width_height,
+            max_n_detections,
+        )
+    )
+
+    return centroid_da, shape_da, confidence_da, label_da
+
+
+def _fuse_single_image_detections_NMS(
+    position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
+    shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
+    confidence: np.ndarray,  # model, annot
+    label: np.ndarray,  # model, annot
+    image_width_height: np.ndarray,  # = np.array([4096, 2160]),
+    max_n_detections: int,
+    **nms_kwargs: dict,  # NMS only kwargs
+) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
+    """Fuse detections across models for a single image using NMS."""
+    # Prepare single image arrays for fusion
+    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
+        _preprocess_single_image_detections(
+            position, shape, confidence, label, image_width_height
+        )
+    )
+
+    # ------------------------------------
+    # Run WBF on one image
+    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = nms(
+        list_bboxes_per_model,
+        list_confidence_per_model,
+        list_label_per_model,
+        **nms_kwargs,
+    )
+
+    # ------------------------------------
+
+    # Format output as xarray dataarrays
+    centroid_da, shape_da, confidence_da, label_da = (
+        _postprocess_single_image_detections(
+            ensemble_x1y1_x2y2_norm,
+            ensemble_scores,
+            ensemble_labels,
+            image_width_height,
+            max_n_detections,
+        )
+    )
+
+    return centroid_da, shape_da, confidence_da, label_da
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index e738cc96..2fbf2835 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -87,7 +87,7 @@ def predict_step(self, batch, batch_idx):
 
         return raw_prediction_dicts_per_sample
 
-    def format_predictions(self) -> xr.Dataset:
+    def format_predictions(self, attrs: dict) -> xr.Dataset:
         """Format as ethology detections dataset with model axis."""
         # Get results from trainer
         raw_predictions_per_model = self.trainer.predict_loop.predictions
@@ -156,4 +156,5 @@ def format_predictions(self) -> xr.Dataset:
                 "id": np.arange(max_n_detections),
                 "model": np.arange(len(self.list_models)),
             },
+            attrs=attrs if attrs else {},
         )
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index 7881bc8b..da189253 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -1,22 +1,29 @@
 # %%
 # imports
 
-from itertools import chain
 from pathlib import Path
 
 import numpy as np
 import torch
 import torchvision.transforms.v2 as transforms
+import xarray as xr
 import yaml
 from lightning import Trainer
+from matplotlib import pyplot as plt
+from PIL import Image
 from torch.utils.data import DataLoader
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
-from ethology.detectors.ensembles.fusion import fuse_ensemble_detections_WBF
+from ethology.detectors.ensembles.fusion import (
+    fuse_ensemble_detections_NMS,
+    fuse_ensemble_detections_WBF,
+)
 from ethology.detectors.ensembles.models import EnsembleDetector
 from ethology.detectors.evaluate import compute_precision_recall_ds
 from ethology.io.annotations import load_bboxes
 
+# %%
+# %matplotlib widget
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 
 
@@ -71,6 +78,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Input data
 
 dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full")
+images_dir = dataset_dir / "frames"
 annotations_dir = dataset_dir / "annotations"
 annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json"
 
@@ -87,6 +95,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 
 # Create COCO dataset
 # TODO: convert from ethology detections dataset to COCO dataset
+# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
 dataset_coco = create_coco_dataset(
     images_dir=Path(dataset_dir) / "frames",
     annotations_file=annotations_file_path,
@@ -159,11 +168,13 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
     },
     "fusion": {
         "method": "wbf",
-        "iou_th_ensemble": 0.5,
-        "skip_box_th": 0.0001,
-        "n_jobs": -1,  # workers for joblib.Parallel, n_workers should be <= number of CPU cores
+        "method_kwargs": {  # arguments as in ensemble_boxes.weighted_boxes_fusion
+            "iou_thr": 0.5,  # iou threshold for the ensemble
+            "skip_box_thr": 0.0001,
+        },
+        # "n_jobs": -1,  # workers for joblib.Parallel, n_workers should be <= number of CPU cores
         # "confidence_threshold_post_fusion": 0.0,
-        # "max_n_detections": 300
+        "max_n_detections": 300,
     },
 }
 config_file = "ensemble_of_detectors.yaml"
@@ -183,25 +194,90 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # [batch][sample][model]- dict
 
 
-# Format predictions as ethology detections dataset
+#
+# Format predictions as ethology detections dataset and add attrs
 # TODO: think about syntax of format_predictions (should it be instance or
 # static method instead?)
-# Can it just be output from .predict?
-ensemble_detections_ds = ensemble_detector.format_predictions()
+# Q: Can it just be output from .predict?
+# TODO: dataloader to ethology detections dataset
+gt_bboxes_ds = load_bboxes.from_files(
+    annotations_file_path, format="COCO", images_dirs=images_dir
+)
+ensemble_detections_ds = ensemble_detector.format_predictions(
+    attrs=gt_bboxes_ds.attrs
+)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Some nice plots:
+# ensemble_detections_ds.confidence.sel(image_id=0).plot()
+# ensemble_detections_ds.confidence.sel(model=0).plot()
+for m in range(5):
+    plt.figure()
+    ensemble_detections_ds.confidence.sel(model=m).plot()
+
+
+# %%%%%%%%
+# All models predict less boxes and have less avg confidence per image in
+# image_ids from 350 to 450. Let's inspect video names and images for these
+# samples.
+
+# Add video name array
+video_name = [
+    ensemble_detections_ds.map_image_id_to_filename[img_id].split("_frame")[0]
+    for img_id in ensemble_detections_ds.image_id.values
+]
+ensemble_detections_ds["video"] = xr.DataArray(video_name, dims="image_id")
+
+# which videos?
+np.unique(ensemble_detections_ds.video.sel(image_id=range(350, 450)).values)
+
+# %%%%%%
+# Visualise image
+for image_id in range(350, 450, 10):
+    image_filename = ensemble_detections_ds.map_image_id_to_filename[image_id]
+    image_path = ensemble_detections_ds.images_directories / image_filename
+
+    # img = Image.open(image_path)
+    img = plt.imread(image_path)
+
+    plt.figure()
+    plt.imshow(img)
+    plt.title(f"{image_filename}")
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Fuse detections across models
+# Fuse detections across models with WBF
 # TODO: think whether joblib approach is more readable?
 image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
 
+config_fusion = config["fusion"]
+
 fused_detections_ds = fuse_ensemble_detections_WBF(
     ensemble_detections_ds,
     image_width_height=image_width_height,
-    iou_thr_ensemble=0.5,
-    skip_box_thr=0.0001,
-    max_n_detections=300,
+    max_n_detections=config_fusion["max_n_detections"],
+    wbf_kwargs=config_fusion["method_kwargs"],
+    # should be larger than expected maximum number of detections after fusion
+    # ---- method kwargs ----
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Fuse detections across models with NMS
+
+config_fusion = config["fusion"]
+
+fused_detections_nms_ds = fuse_ensemble_detections_NMS(
+    ensemble_detections_ds,
+    image_width_height=image_width_height,
+    max_n_detections=config_fusion["max_n_detections"],
+    nms_kwargs={
+        "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
+    },
+    # should be larger than expected maximum number of detections after fusion
+    # ---- method kwargs ----
 )
 
+# fused_detections_ds = fused_detections_nms_ds
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Remove low confidence detections
 confidence_threshold_post_fusion = 0.5
@@ -214,7 +290,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # - load ground truth
 # - compute metrics
 
-gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
+# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
 
 iou_threshold_tp = 0.25
 fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds(

From fdb6901908f27f831cadb72c0bdc51e95dcea716 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Tue, 18 Nov 2025 16:43:02 +0000
Subject: [PATCH 06/39] Add general fusion functions. Add error for
 insufficient max n of detections. Add types for kwargs. Add image shape
 validation

---
 ethology/detectors/ensembles/fusion.py | 427 +++++++++++++------------
 ethology/detectors/ensembles/models.py |   4 +-
 examples/ensemble_of_detectors.py      |  37 +--
 3 files changed, 236 insertions(+), 232 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index a32d6252..59b7e64a 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -2,10 +2,171 @@
 
 import numpy as np
 import xarray as xr
-from ensemble_boxes import weighted_boxes_fusion, nms
+
+import ensemble_boxes
+
+from typing import Callable, Optional, Literal
+from functools import partial
+from typing import TypedDict, Unpack
+
+VALID_FUSION_METHODS = {
+    "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion,
+    "nms": ensemble_boxes.nms,
+    "soft_nms": ensemble_boxes.soft_nms,
+    "non_maxium_weighted": ensemble_boxes.non_maximum_weighted,
+}
+
+class _TypeFusionKwargs(TypedDict, total=False):
+    """Type hints for fusion method kwargs.
+
+    Parameters for methods as described in the ensemble_boxes documentation.
+    See https://github.com/ZFTurbo/Weighted-Boxes-Fusion
+    
+    Parameters
+    ----------
+    weights: list[float]
+        Weights for each model.
+    iou_thr: float
+        IoU threshold for detections to be considered a true positive 
+        during fusion.
+    skip_box_thr: float
+        Exclude from fusion boxes with confidence below this value.
+    sigma: float
+        Sigma for soft NMS.
+    thresh: float
+        Threshold for boxes to keep after soft NMS.
+    conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"]
+        Method to compute the confidence score of the fused detections.
+        - "avg": Average confidence score of the fused detections (default).
+        - 'box_and_model_avg': box and model wise hybrid weighted average.
+        - 'absent_model_aware_avg': weighted average that takes into account the absent model.
+    allows_overflow: bool
+        Whether to allow the confidence score of the fused detections to exceed 1.
+    """
+    weights: list[float] | None
+    iou_thr: float
+    skip_box_thr: float
+    sigma: float
+    thresh: float
+    conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"]
+    allows_overflow: bool
+
+# TODO:
+# @decorator-that-checks-output-is-a-detections-dataset
+def fuse_ensemble_detections(
+    ensemble_detections_ds: xr.Dataset,
+    fusion_method: Literal["weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"],
+    fusion_method_kwargs: Optional[dict] = None,
+    max_n_detections: int = 500,
+) -> xr.Dataset:
+    """Fuse ensemble detections across models using WBF."""
+    # Check if image_width_height defined in dataset
+    image_shape = ensemble_detections_ds.attrs.get("image_shape")
+    if image_shape is None:
+        raise KeyError(
+            "Required attribute 'image_shape' not found in the dataset attributes. "
+            "Please ensure the dataset has 'image_shape' (width, height in pixels) "
+            "in its attributes."
+        )
+    else:
+        image_width_height = _validate_image_shape(image_shape)
+
+    # Build single-image partial fusion function for the selected method
+    if fusion_method not in VALID_FUSION_METHODS:
+        raise ValueError(
+            f"Invalid fusion method: {fusion_method}. "
+            f"Valid methods are: {list(VALID_FUSION_METHODS.keys())}"
+        )
+    fusion_function = VALID_FUSION_METHODS[fusion_method]
+    _fuse_single_image_detections_partial = partial(
+        _fuse_single_image_detections, fusion_function
+    )
+
+    # Prepare kwargs for fusion function
+    if not fusion_method_kwargs:
+        fusion_method_kwargs = {}
+
+    # Run fusion across image_id using apply_ufunc
+    centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
+        xr.apply_ufunc(
+            _fuse_single_image_detections_partial,
+            ensemble_detections_ds.position,  # .data array is passed
+            ensemble_detections_ds.shape,
+            ensemble_detections_ds.confidence,
+            ensemble_detections_ds.label,
+            kwargs={
+                "image_width_height": image_width_height,
+                "max_n_detections": max_n_detections,
+                **fusion_method_kwargs,
+            },
+            input_core_dims=[  # do not broadcast across these
+                ["space", "id", "model"],   # centroid
+                ["space", "id", "model"],   # shape
+                ["id", "model"],            # confidence
+                ["id", "model"],            # label
+            ],
+            output_core_dims=[ # do not broadcast across these
+                ["space", "id"],    # centroid
+                ["space", "id"],    # shape
+                ["id"],             # confidence
+                ["id"],             # label
+            ],
+            vectorize=True,
+            # TODO: can I avoid vectorize?
+            # loop over non-core dims (i.e. image_id);
+            # assumes function only takes arrays over core dims as input
+            exclude_dims={"id"},
+            # to allow dimensions that change size between input and output
+        )
+    )
+
+    # Post process data arrays
+    fused_data_arrays = {
+        "position": centroid_fused_da,
+        "shape": shape_fused_da,
+        "confidence": confidence_fused_da,
+        "label": label_fused_da,
+    }
+    fused_data_arrays = _postprocess_multi_image_fused_arrays(
+        **fused_data_arrays
+    )
+
+    # Return a dataset
+    return xr.Dataset(data_vars=fused_data_arrays)
+
+
+def _validate_image_shape(image_shape) -> np.ndarray:
+    """Validate and convert image shape to numpy array.
+    
+    Args:
+        image_shape: Image dimensions as (width, height).
+            Should be array-like with 2 elements.
+    
+    Returns:
+        np.ndarray: Validated image shape as 1D array with 2 elements.
+    
+    Raises:
+        ValueError: If image_shape cannot be converted to a valid shape.
+    """
+    try:
+        image_shape = np.asarray(image_shape)
+    except (TypeError, ValueError) as e:
+        raise ValueError(
+            f"Cannot convert 'image_shape' to array: {e}. "
+            "Expected format: (width, height) as tuple or array-like."
+        ) from e
+    
+    # Flatten to handle (2,), (1,2) and (2,1) shapes
+    image_shape = image_shape.flatten()
+    if image_shape.shape != (2,):
+        raise ValueError(
+            f"'image_shape' must have exactly 2 elements (width, height), "
+            f"got shape {image_shape.shape}"
+        )
+    
+    return image_shape
 
 
-# ----------- Helper functions ---------------------------
 def _preprocess_single_image_detections(
     position: xr.DataArray,
     shape: xr.DataArray,
@@ -86,6 +247,15 @@ def _postprocess_single_image_detections(
         ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1)
     ]
 
+    # Check padding
+    if ensemble_x1y2_x2y2_scores_labels.shape[0] > max_n_detections:
+        raise ValueError(
+            "Insufficient padding provided. "
+            f"The estimated maximum number of detections per image was set to {max_n_detections}, "
+            f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were found in one of the images "
+            "after fusion. Please increase the maximum number of detections per image."
+        )
+
     # Pad combined array to max_n_detections
     # (this is required to concatenate across image_ids)
     ensemble_x1y2_x2y2_scores_labels = np.pad(
@@ -109,6 +279,50 @@ def _postprocess_single_image_detections(
 
     return centroid_da, shape_da, confidence_da, label_da
 
+def _fuse_single_image_detections(
+    fusion_function: Callable,
+    position,  
+    shape, 
+    confidence: np.ndarray,  
+    label: np.ndarray, 
+    image_width_height: np.ndarray, 
+    max_n_detections: int,
+    **fusion_kwargs: Unpack[_TypeFusionKwargs],  #  method-only kwargs
+) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
+    """Fuse detections across models for a single image using WBF."""
+    # Prepare single image arrays for fusion
+    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
+        _preprocess_single_image_detections(
+            position, shape, confidence, label, image_width_height
+        )
+    )
+
+    # ------------------------------------
+    # Run WBF on one image
+    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
+        fusion_function(
+            list_bboxes_per_model,
+            list_confidence_per_model,
+            list_label_per_model,
+            **fusion_kwargs,
+        )
+    )
+
+    # ------------------------------------
+
+    # Format output as xarray dataarrays
+    centroid_da, shape_da, confidence_da, label_da = (
+        _postprocess_single_image_detections(
+            ensemble_x1y1_x2y2_norm,
+            ensemble_scores,
+            ensemble_labels,
+            image_width_height,
+            max_n_detections,
+        )
+    )
+
+    return centroid_da, shape_da, confidence_da, label_da
+
 
 def _single_image_detections_as_dataarrays(
     x1y1_x2y2_array: np.ndarray,
@@ -168,213 +382,4 @@ def _postprocess_multi_image_fused_arrays(
     }
 
 
-# -------------------------------------
-
 
-def fuse_ensemble_detections_WBF(
-    ensemble_detections_ds: xr.Dataset,
-    image_width_height: np.ndarray,
-    max_n_detections: int,
-    wbf_kwargs: dict,
-    # iou_thr_ensemble: float = 0.5,
-    # skip_box_thr: float = 0.0001,
-    # max_n_detections: int = 300,
-) -> xr.Dataset:
-    """Fuse ensemble detections across models using WBF."""
-
-    # Run WBF across image_id
-    centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
-        xr.apply_ufunc(
-            _fuse_single_image_detections_WBF,
-            ensemble_detections_ds.position,  # .data array is passed
-            ensemble_detections_ds.shape,
-            ensemble_detections_ds.confidence,
-            ensemble_detections_ds.label,
-            kwargs={
-                "image_width_height": image_width_height,
-                "max_n_detections": max_n_detections,
-                **wbf_kwargs,
-            },
-            input_core_dims=[  # do not broadcast across these
-                ["space", "id", "model"],
-                ["space", "id", "model"],
-                ["id", "model"],
-                ["id", "model"],
-            ],
-            output_core_dims=[
-                ["space", "id"],
-                ["space", "id"],
-                ["id"],
-                ["id"],
-            ],
-            vectorize=True,
-            # loop over non-core dims (i.e. image_id);
-            # assumes function only takes arrays over core dims as input
-            exclude_dims={"id"},
-            # to allow dimensions that change size btw input and output
-        )
-    )
-
-    # Post process data arrays
-    fused_data_arrays = {
-        "position": centroid_fused_da,
-        "shape": shape_fused_da,
-        "confidence": confidence_fused_da,
-        "label": label_fused_da,
-    }
-    fused_data_arrays = _postprocess_multi_image_fused_arrays(
-        **fused_data_arrays
-    )
-
-    # Return a dataset
-    # FIX: why is id not a coordinate in the output dataset?
-    # FIX: order of dimensions should be image_id, space, id
-    return xr.Dataset(data_vars=fused_data_arrays)
-
-
-def fuse_ensemble_detections_NMS(
-    ensemble_detections_ds: xr.Dataset,
-    image_width_height: np.ndarray,
-    max_n_detections: int,
-    nms_kwargs: dict,
-    # iou_thr_ensemble: float = 0.5,
-    # skip_box_thr: float = 0.0001,
-    # max_n_detections: int = 300,
-) -> xr.Dataset:
-    """Fuse ensemble detections across models using WBF."""
-
-        # Run WBF across image_id
-    centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
-        xr.apply_ufunc(
-            _fuse_single_image_detections_NMS,
-            ensemble_detections_ds.position,  # .data array is passed
-            ensemble_detections_ds.shape,
-            ensemble_detections_ds.confidence,
-            ensemble_detections_ds.label,
-            kwargs={
-                "image_width_height": image_width_height,
-                "max_n_detections": max_n_detections,
-                **nms_kwargs,
-            },
-            input_core_dims=[  # do not broadcast across these
-                ["space", "id", "model"],
-                ["space", "id", "model"],
-                ["id", "model"],
-                ["id", "model"],
-            ],
-            output_core_dims=[
-                ["space", "id"],
-                ["space", "id"],
-                ["id"],
-                ["id"],
-            ],
-            vectorize=True,
-            # loop over non-core dims (i.e. image_id);
-            # assumes function only takes arrays over core dims as input
-            exclude_dims={"id"},
-            # to allow dimensions that change size btw input and output
-        )
-    )
-
-    # Post process data arrays
-    fused_data_arrays = {
-        "position": centroid_fused_da,
-        "shape": shape_fused_da,
-        "confidence": confidence_fused_da,
-        "label": label_fused_da,
-    }
-    fused_data_arrays = _postprocess_multi_image_fused_arrays(
-        **fused_data_arrays
-    )
-
-    # Return a dataset
-    # FIX: why is id not a coordinate in the output dataset?
-    # FIX: order of dimensions should be image_id, space, id
-    return xr.Dataset(data_vars=fused_data_arrays)
-
-
-# --------------- Single image ---------------------------
-def _fuse_single_image_detections_WBF(
-    position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
-    shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
-    confidence: np.ndarray,  # model, annot
-    label: np.ndarray,  # model, annot
-    image_width_height: np.ndarray,  # = np.array([4096, 2160]),
-    max_n_detections: int,
-    **wbf_kwargs: dict,  # WBF only kwargs
-) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Fuse detections across models for a single image using WBF."""
-    # Prepare single image arrays for fusion
-    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
-        _preprocess_single_image_detections(
-            position, shape, confidence, label, image_width_height
-        )
-    )
-
-    # ------------------------------------
-    # Run WBF on one image
-    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
-        weighted_boxes_fusion(
-            list_bboxes_per_model,
-            list_confidence_per_model,
-            list_label_per_model,
-            **wbf_kwargs,
-        )
-    )
-
-    # ------------------------------------
-
-    # Format output as xarray dataarrays
-    centroid_da, shape_da, confidence_da, label_da = (
-        _postprocess_single_image_detections(
-            ensemble_x1y1_x2y2_norm,
-            ensemble_scores,
-            ensemble_labels,
-            image_width_height,
-            max_n_detections,
-        )
-    )
-
-    return centroid_da, shape_da, confidence_da, label_da
-
-
-def _fuse_single_image_detections_NMS(
-    position,  # bboxes_x1y1: np.ndarray,  # model, annot, 4
-    shape,  # bboxes_x2y2: np.ndarray,  # model, annot, 4
-    confidence: np.ndarray,  # model, annot
-    label: np.ndarray,  # model, annot
-    image_width_height: np.ndarray,  # = np.array([4096, 2160]),
-    max_n_detections: int,
-    **nms_kwargs: dict,  # NMS only kwargs
-) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Fuse detections across models for a single image using NMS."""
-    # Prepare single image arrays for fusion
-    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
-        _preprocess_single_image_detections(
-            position, shape, confidence, label, image_width_height
-        )
-    )
-
-    # ------------------------------------
-    # Run WBF on one image
-    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = nms(
-        list_bboxes_per_model,
-        list_confidence_per_model,
-        list_label_per_model,
-        **nms_kwargs,
-    )
-
-    # ------------------------------------
-
-    # Format output as xarray dataarrays
-    centroid_da, shape_da, confidence_da, label_da = (
-        _postprocess_single_image_detections(
-            ensemble_x1y1_x2y2_norm,
-            ensemble_scores,
-            ensemble_labels,
-            image_width_height,
-            max_n_detections,
-        )
-    )
-
-    return centroid_da, shape_da, confidence_da, label_da
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 2fbf2835..412b9fbc 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -87,7 +87,9 @@ def predict_step(self, batch, batch_idx):
 
         return raw_prediction_dicts_per_sample
 
-    def format_predictions(self, attrs: dict) -> xr.Dataset:
+    # TODO:
+    # @decorator-that-checks-output-is-a-detections-dataset
+    def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
         """Format as ethology detections dataset with model axis."""
         # Get results from trainer
         raw_predictions_per_model = self.trainer.predict_loop.predictions
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index da189253..71a9336f 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -14,10 +14,7 @@
 from torch.utils.data import DataLoader
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
-from ethology.detectors.ensembles.fusion import (
-    fuse_ensemble_detections_NMS,
-    fuse_ensemble_detections_WBF,
-)
+from ethology.detectors.ensembles.fusion import fuse_ensemble_detections
 from ethology.detectors.ensembles.models import EnsembleDetector
 from ethology.detectors.evaluate import compute_precision_recall_ds
 from ethology.io.annotations import load_bboxes
@@ -167,7 +164,8 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
         ],
     },
     "fusion": {
-        "method": "wbf",
+        "method": "weighted_boxes_fusion",
+        # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted"
         "method_kwargs": {  # arguments as in ensemble_boxes.weighted_boxes_fusion
             "iou_thr": 0.5,  # iou threshold for the ensemble
             "skip_box_thr": 0.0001,
@@ -191,10 +189,8 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Use Trainer for inference (this sets the device flexibly)
 trainer = Trainer(accelerator="gpu", devices=1, logger=False)
 _ = trainer.predict(ensemble_detector, dataloader)
-# [batch][sample][model]- dict
 
 
-#
 # Format predictions as ethology detections dataset and add attrs
 # TODO: think about syntax of format_predictions (should it be instance or
 # static method instead?)
@@ -249,14 +245,17 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Fuse detections across models with WBF
 # TODO: think whether joblib approach is more readable?
 image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
+ensemble_detections_ds.attrs['image_shape'] = image_width_height
 
 config_fusion = config["fusion"]
 
-fused_detections_ds = fuse_ensemble_detections_WBF(
+
+# %%
+fused_detections_ds = fuse_ensemble_detections(
     ensemble_detections_ds,
-    image_width_height=image_width_height,
-    max_n_detections=config_fusion["max_n_detections"],
-    wbf_kwargs=config_fusion["method_kwargs"],
+    fusion_method=config_fusion['method'],
+    fusion_method_kwargs=config_fusion["method_kwargs"],
+    # max_n_detections=config_fusion["max_n_detections"],
     # should be larger than expected maximum number of detections after fusion
     # ---- method kwargs ----
 )
@@ -264,20 +263,18 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Fuse detections across models with NMS
 
-config_fusion = config["fusion"]
-
-fused_detections_nms_ds = fuse_ensemble_detections_NMS(
+fused_detections_nms_ds = fuse_ensemble_detections(
     ensemble_detections_ds,
-    image_width_height=image_width_height,
-    max_n_detections=config_fusion["max_n_detections"],
-    nms_kwargs={
+    fusion_method='soft_nms',
+    fusion_method_kwargs={
         "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
+        "sigma":0.5,
+        "thresh":0.001
     },
-    # should be larger than expected maximum number of detections after fusion
-    # ---- method kwargs ----
+    max_n_detections=500
 )
 
-# fused_detections_ds = fused_detections_nms_ds
+fused_detections_ds = fused_detections_nms_ds
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Remove low confidence detections
 confidence_threshold_post_fusion = 0.5

From 7deace18c95f73946a30a7d34d27c3795126e24e Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 18 Nov 2025 16:50:31 +0000
Subject: [PATCH 07/39] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/detectors/ensembles/fusion.py | 68 ++++++++++++++------------
 ethology/detectors/ensembles/utils.py  |  3 --
 examples/ensemble_of_detectors.py      | 13 +++--
 3 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 59b7e64a..913f34c0 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -1,13 +1,12 @@
 """Wrappers around ensemble-boxes fusion functions."""
 
-import numpy as np
-import xarray as xr
+from collections.abc import Callable
+from functools import partial
+from typing import Literal, TypedDict, Unpack
 
 import ensemble_boxes
-
-from typing import Callable, Optional, Literal
-from functools import partial
-from typing import TypedDict, Unpack
+import numpy as np
+import xarray as xr
 
 VALID_FUSION_METHODS = {
     "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion,
@@ -16,18 +15,19 @@
     "non_maxium_weighted": ensemble_boxes.non_maximum_weighted,
 }
 
+
 class _TypeFusionKwargs(TypedDict, total=False):
     """Type hints for fusion method kwargs.
 
     Parameters for methods as described in the ensemble_boxes documentation.
     See https://github.com/ZFTurbo/Weighted-Boxes-Fusion
-    
+
     Parameters
     ----------
     weights: list[float]
         Weights for each model.
     iou_thr: float
-        IoU threshold for detections to be considered a true positive 
+        IoU threshold for detections to be considered a true positive
         during fusion.
     skip_box_thr: float
         Exclude from fusion boxes with confidence below this value.
@@ -42,7 +42,9 @@ class _TypeFusionKwargs(TypedDict, total=False):
         - 'absent_model_aware_avg': weighted average that takes into account the absent model.
     allows_overflow: bool
         Whether to allow the confidence score of the fused detections to exceed 1.
+
     """
+
     weights: list[float] | None
     iou_thr: float
     skip_box_thr: float
@@ -51,12 +53,15 @@ class _TypeFusionKwargs(TypedDict, total=False):
     conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"]
     allows_overflow: bool
 
+
 # TODO:
 # @decorator-that-checks-output-is-a-detections-dataset
 def fuse_ensemble_detections(
     ensemble_detections_ds: xr.Dataset,
-    fusion_method: Literal["weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"],
-    fusion_method_kwargs: Optional[dict] = None,
+    fusion_method: Literal[
+        "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"
+    ],
+    fusion_method_kwargs: dict | None = None,
     max_n_detections: int = 500,
 ) -> xr.Dataset:
     """Fuse ensemble detections across models using WBF."""
@@ -100,16 +105,16 @@ def fuse_ensemble_detections(
                 **fusion_method_kwargs,
             },
             input_core_dims=[  # do not broadcast across these
-                ["space", "id", "model"],   # centroid
-                ["space", "id", "model"],   # shape
-                ["id", "model"],            # confidence
-                ["id", "model"],            # label
+                ["space", "id", "model"],  # centroid
+                ["space", "id", "model"],  # shape
+                ["id", "model"],  # confidence
+                ["id", "model"],  # label
             ],
-            output_core_dims=[ # do not broadcast across these
-                ["space", "id"],    # centroid
-                ["space", "id"],    # shape
-                ["id"],             # confidence
-                ["id"],             # label
+            output_core_dims=[  # do not broadcast across these
+                ["space", "id"],  # centroid
+                ["space", "id"],  # shape
+                ["id"],  # confidence
+                ["id"],  # label
             ],
             vectorize=True,
             # TODO: can I avoid vectorize?
@@ -137,16 +142,17 @@ def fuse_ensemble_detections(
 
 def _validate_image_shape(image_shape) -> np.ndarray:
     """Validate and convert image shape to numpy array.
-    
+
     Args:
         image_shape: Image dimensions as (width, height).
             Should be array-like with 2 elements.
-    
+
     Returns:
         np.ndarray: Validated image shape as 1D array with 2 elements.
-    
+
     Raises:
         ValueError: If image_shape cannot be converted to a valid shape.
+
     """
     try:
         image_shape = np.asarray(image_shape)
@@ -155,7 +161,7 @@ def _validate_image_shape(image_shape) -> np.ndarray:
             f"Cannot convert 'image_shape' to array: {e}. "
             "Expected format: (width, height) as tuple or array-like."
         ) from e
-    
+
     # Flatten to handle (2,), (1,2) and (2,1) shapes
     image_shape = image_shape.flatten()
     if image_shape.shape != (2,):
@@ -163,7 +169,7 @@ def _validate_image_shape(image_shape) -> np.ndarray:
             f"'image_shape' must have exactly 2 elements (width, height), "
             f"got shape {image_shape.shape}"
         )
-    
+
     return image_shape
 
 
@@ -279,13 +285,14 @@ def _postprocess_single_image_detections(
 
     return centroid_da, shape_da, confidence_da, label_da
 
+
 def _fuse_single_image_detections(
     fusion_function: Callable,
-    position,  
-    shape, 
-    confidence: np.ndarray,  
-    label: np.ndarray, 
-    image_width_height: np.ndarray, 
+    position,
+    shape,
+    confidence: np.ndarray,
+    label: np.ndarray,
+    image_width_height: np.ndarray,
     max_n_detections: int,
     **fusion_kwargs: Unpack[_TypeFusionKwargs],  #  method-only kwargs
 ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
@@ -380,6 +387,3 @@ def _postprocess_multi_image_fused_arrays(
         "confidence": confidence_da,
         "label": label_da,
     }
-
-
-
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
index 53f24dce..75d8e907 100644
--- a/ethology/detectors/ensembles/utils.py
+++ b/ethology/detectors/ensembles/utils.py
@@ -1,7 +1,6 @@
 """Utility functions for reshaping outputs of ensembles of detectors."""
 
 import numpy as np
-import xarray as xr
 
 
 def get_padding_width(array, max_n):
@@ -24,5 +23,3 @@ def pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
         for arr in list_arrays
     ]
     return list_arrays_padded
-
-
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index 71a9336f..dcb28d1c 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -10,7 +10,6 @@
 import yaml
 from lightning import Trainer
 from matplotlib import pyplot as plt
-from PIL import Image
 from torch.utils.data import DataLoader
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
@@ -245,7 +244,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Fuse detections across models with WBF
 # TODO: think whether joblib approach is more readable?
 image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
-ensemble_detections_ds.attrs['image_shape'] = image_width_height
+ensemble_detections_ds.attrs["image_shape"] = image_width_height
 
 config_fusion = config["fusion"]
 
@@ -253,7 +252,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # %%
 fused_detections_ds = fuse_ensemble_detections(
     ensemble_detections_ds,
-    fusion_method=config_fusion['method'],
+    fusion_method=config_fusion["method"],
     fusion_method_kwargs=config_fusion["method_kwargs"],
     # max_n_detections=config_fusion["max_n_detections"],
     # should be larger than expected maximum number of detections after fusion
@@ -265,13 +264,13 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 
 fused_detections_nms_ds = fuse_ensemble_detections(
     ensemble_detections_ds,
-    fusion_method='soft_nms',
+    fusion_method="soft_nms",
     fusion_method_kwargs={
         "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
-        "sigma":0.5,
-        "thresh":0.001
+        "sigma": 0.5,
+        "thresh": 0.001,
     },
-    max_n_detections=500
+    max_n_detections=500,
 )
 
 fused_detections_ds = fused_detections_nms_ds

From d989fa1b9cb33a83f3a7c690b735f335d15ecc01 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 19 Nov 2025 11:48:21 +0000
Subject: [PATCH 08/39] Validate supported detectors. Refactor model state dict
 fetching

---
 ethology/detectors/ensembles/models.py | 80 +++++++++++++++++++-------
 1 file changed, 58 insertions(+), 22 deletions(-)

diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 412b9fbc..4622a531 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -6,10 +6,10 @@
 import numpy as np
 import torch
 import torch.nn as nn
-import torchvision.models.detection as detection_models
 import xarray as xr
 import yaml
 from lightning import LightningModule
+from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import pad_to_max_first_dimension
 
@@ -32,39 +32,75 @@ def __init__(self, config_file: str | Path):
         with open(self.config_file) as f:
             self.config = yaml.safe_load(f)
 
+        # Run checks
+        self._validate_model_class()
+
         # Load list of models (nn.ModuleList)
-        self.list_models = self.load_models()
+        self.list_models = self._load_models()
+
+    @staticmethod
+    def _validate_model_class(model_class_str: str) -> None:
+        """Validate that the model is part of torchvision.models.detection."""
+        valid_models = set(list_models(module=detection))
+        if model_class_str not in valid_models:
+            valid_sorted = ", ".join(sorted(valid_models))
+            raise ValueError(
+                f"'{model_class_str}' is not a supported detection model. "
+                f"Valid options: {valid_sorted}"
+            )
 
-    def load_models(self) -> nn.ModuleList:
+    def _load_models(self) -> nn.ModuleList:
         """Load models from checkpoints."""
+        # Get model architecture
         models_config = self.config["models"]
-        model_class = getattr(detection_models, models_config["model_class"])
+        model = get_model(
+            models_config["model_class"],
+            **models_config.get("model_kwargs", {}),
+        )
 
+        # Load weights
         list_models = []
         for checkpoint_path in models_config["checkpoints"]:
-            # Get model architecture and weights
-            model = model_class(**models_config["model_kwargs"])
+            # Get checkpoint
             checkpoint = torch.load(checkpoint_path, map_location=self.device)
-            state_dict = checkpoint["state_dict"]
 
-            # Load state dict into model
-            # PyTorch Lightning saves the model with a "model."
-            # prefix in the state_dict keys if you defined self.model
-            # in your LightningModule - we remove the prefix here.
-            if any(key.startswith("model.") for key in state_dict):
-                model_state_dict = {
-                    key.replace("model.", "", 1): value
-                    for key, value in state_dict.items()
-                    if key.startswith("model.")
-                }
-            else:
-                model_state_dict = state_dict
-            model.load_state_dict(model_state_dict)
-
-            # Append to list
+            # Load state dict
+            model_state_dict = self._get_model_state_dict(checkpoint)
+            model.load_state_dict(model_state_dict, strict=True)
+
             list_models.append(model)
+
         return nn.ModuleList(list_models)
 
+    @staticmethod
+    def _get_model_state_dict(checkpoint):
+        # Handle different checkpoint formats
+        if "state_dict" in checkpoint:
+            state_dict = checkpoint["state_dict"]
+        elif isinstance(checkpoint, dict):
+            # Checkpoint might be the state dict itself
+            state_dict = checkpoint
+        else:
+            raise ValueError(
+                "Checkpoint format not recognized. "
+                "Expected 'state_dict' key or dict of tensors."
+            )
+
+        # Load state dict into model
+        # PyTorch Lightning saves the model with a "model."
+        # prefix in the state_dict keys if you defined self.model
+        # in your LightningModule - we remove the prefix here.
+        if any(key.startswith("model.") for key in state_dict):
+            model_state_dict = {
+                key.replace("model.", "", 1): value
+                for key, value in state_dict.items()
+                if key.startswith("model.")
+            }
+        else:
+            model_state_dict = state_dict
+
+        return model_state_dict
+
     def predict_step(self, batch, batch_idx):
         """Predict step for a single batch."""
         # ------------------------------

From 62b45e5b883ddee27b54df3e8a0d7c0d4445f956 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 19 Nov 2025 13:42:38 +0000
Subject: [PATCH 09/39] Rename to ValidBboxAnnotationsDataset. Add validator
 for bbox detections

---
 ethology/io/annotations/load_bboxes.py        |  6 +-
 ethology/io/annotations/save_bboxes.py        | 11 ++-
 ethology/io/annotations/validate.py           | 39 +---------
 ethology/io/detections/validate.py            | 71 +++++++++++++++++++
 ethology/io/validate.py                       | 39 ++++++++++
 .../test_io_annotations/test_validators.py    |  6 +-
 6 files changed, 122 insertions(+), 50 deletions(-)
 create mode 100644 ethology/io/detections/validate.py
 create mode 100644 ethology/io/validate.py

diff --git a/ethology/io/annotations/load_bboxes.py b/ethology/io/annotations/load_bboxes.py
index d59d0abc..5812d346 100644
--- a/ethology/io/annotations/load_bboxes.py
+++ b/ethology/io/annotations/load_bboxes.py
@@ -11,15 +11,15 @@
 from pandera.typing.pandas import DataFrame
 
 from ethology.io.annotations.validate import (
+    ValidBboxAnnotationsDataset,
     ValidBboxesDataFrame,
-    ValidBboxesDataset,
     ValidCOCO,
     ValidVIA,
-    _check_output,
 )
+from ethology.io.validate import _check_output
 
 
-@_check_output(ValidBboxesDataset)
+@_check_output(ValidBboxAnnotationsDataset)
 def from_files(
     file_paths: Path | str | list[Path | str],
     format: Literal["VIA", "COCO"],
diff --git a/ethology/io/annotations/save_bboxes.py b/ethology/io/annotations/save_bboxes.py
index bf9e09ef..0dd4b0d4 100644
--- a/ethology/io/annotations/save_bboxes.py
+++ b/ethology/io/annotations/save_bboxes.py
@@ -12,15 +12,14 @@
 from pandera.typing.pandas import DataFrame
 
 from ethology.io.annotations.validate import (
+    ValidBboxAnnotationsDataset,
     ValidBboxesDataFrameCOCO,
-    ValidBboxesDataset,
     ValidCOCO,
-    _check_input,
-    _check_output,
 )
+from ethology.io.validate import _check_input, _check_output
 
 
-@_check_input(validator=ValidBboxesDataset)
+@_check_input(validator=ValidBboxAnnotationsDataset)
 @_check_output(validator=ValidCOCO)  # check output is ethology importable
 def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path):
     """Save an ``ethology`` bounding box annotations dataset to a COCO file.
@@ -56,7 +55,7 @@ def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path):
     return output_filepath
 
 
-@_check_input(validator=ValidBboxesDataset)
+@_check_input(validator=ValidBboxAnnotationsDataset)
 @pa.check_types
 def _to_COCO_exportable_df(
     ds: xr.Dataset,
@@ -98,7 +97,7 @@ def _to_COCO_exportable_df(
     return df[cols_to_select]
 
 
-@_check_input(validator=ValidBboxesDataset)
+@_check_input(validator=ValidBboxAnnotationsDataset)
 def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
     """Get preliminary dataframe from a dataset of bounding boxes annotations.
 
diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py
index 2e00ab92..233e80d0 100644
--- a/ethology/io/annotations/validate.py
+++ b/ethology/io/annotations/validate.py
@@ -1,8 +1,6 @@
 """Validators for annotation files and datasets."""
 
 import json
-from collections.abc import Callable
-from functools import wraps
 from pathlib import Path
 
 import pandas as pd
@@ -227,7 +225,7 @@ def _file_contains_unique_image_IDs(self, attribute, value):
 
 
 @define
-class ValidBboxesDataset:
+class ValidBboxAnnotationsDataset:
     """Class for valid ``ethology`` bounding box annotations datasets.
 
     It checks that the input dataset has:
@@ -573,38 +571,3 @@ def check_idx_and_annotation_id(cls, df: pd.DataFrame) -> bool:
 
         """
         return all(df.index == df["annotation_id"])
-
-
-def _check_output(validator: type):
-    """Return a decorator that validates the output of a function."""
-
-    def decorator(function: Callable) -> Callable:
-        @wraps(function)  # to preserve function metadata
-        def wrapper(*args, **kwargs):
-            result = function(*args, **kwargs)
-            validator(result)
-            return result
-
-        return wrapper
-
-    return decorator
-
-
-def _check_input(validator: type, input_index: int = 0):
-    """Return a decorator that validates a specific input of a function.
-
-    By default, the first input is validated. If the input index is
-    larger than the number of inputs, no validation is performed.
-    """
-
-    def decorator(function: Callable) -> Callable:
-        @wraps(function)
-        def wrapper(*args, **kwargs):
-            if len(args) > input_index:
-                validator(args[input_index])
-            result = function(*args, **kwargs)
-            return result
-
-        return wrapper
-
-    return decorator
diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py
new file mode 100644
index 00000000..67075c95
--- /dev/null
+++ b/ethology/io/detections/validate.py
@@ -0,0 +1,71 @@
+"""Validators for detection datasets."""
+
+import xarray as xr
+from attrs import define, field
+
+
+@define
+class ValidBboxDetectionsDataset:
+    """Class for valid ``ethology`` bounding box detections datasets.
+
+    It checks that the input dataset has:
+
+    - ``image_id``, ``space``, ``id`` as dimensions
+    - ``position``, ``shape`` and ``confidence`` as data variables
+
+    Attributes
+    ----------
+    dataset : xarray.Dataset
+        The xarray dataset to validate.
+
+    Raises
+    ------
+    TypeError
+        If the input is not an xarray Dataset.
+    ValueError
+        If the dataset is missing required data variables or dimensions.
+
+    Notes
+    -----
+    The dataset can have other data variables and dimensions, but only the
+    required ones are checked.
+
+    """
+
+    dataset: xr.Dataset = field()
+
+    # Minimum requirements for annotations datasets holding bboxes
+    required_dims: set = field(
+        default={"image_id", "space", "id"},
+        init=False,
+    )
+    required_data_vars: set = field(
+        default={"position", "shape", "confidence"},
+        init=False,
+    )
+
+    @dataset.validator
+    def _check_dataset_type(self, attribute, value):
+        """Ensure the input is an xarray Dataset."""
+        if not isinstance(value, xr.Dataset):
+            raise TypeError(
+                f"Expected an xarray Dataset, but got {type(value)}."
+            )
+
+    @dataset.validator
+    def _check_required_data_variables(self, attribute, value):
+        """Ensure the dataset has all required data variables."""
+        missing_vars = self.required_data_vars - set(value.data_vars)
+        if missing_vars:
+            raise ValueError(
+                f"Missing required data variables: {sorted(missing_vars)}"
+            )
+
+    @dataset.validator
+    def _check_required_dimensions(self, attribute, value):
+        """Ensure the dataset has all required dimensions."""
+        missing_dims = self.required_dims - set(value.dims)
+        if missing_dims:
+            raise ValueError(
+                f"Missing required dimensions: {sorted(missing_dims)}"
+            )
diff --git a/ethology/io/validate.py b/ethology/io/validate.py
new file mode 100644
index 00000000..ca515b19
--- /dev/null
+++ b/ethology/io/validate.py
@@ -0,0 +1,39 @@
+"""Utils for validating `ethology` objects."""
+
+from collections.abc import Callable
+from functools import wraps
+
+
+def _check_output(validator: type):
+    """Return a decorator that validates the output of a function."""
+
+    def decorator(function: Callable) -> Callable:
+        @wraps(function)  # to preserve function metadata
+        def wrapper(*args, **kwargs):
+            result = function(*args, **kwargs)
+            validator(result)
+            return result
+
+        return wrapper
+
+    return decorator
+
+
+def _check_input(validator: type, input_index: int = 0):
+    """Return a decorator that validates a specific input of a function.
+
+    By default, the first input is validated. If the input index is
+    larger than the number of inputs, no validation is performed.
+    """
+
+    def decorator(function: Callable) -> Callable:
+        @wraps(function)
+        def wrapper(*args, **kwargs):
+            if len(args) > input_index:
+                validator(args[input_index])
+            result = function(*args, **kwargs)
+            return result
+
+        return wrapper
+
+    return decorator
diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_io_annotations/test_validators.py
index d054da27..3ae8e417 100644
--- a/tests/test_unit/test_io_annotations/test_validators.py
+++ b/tests/test_unit/test_io_annotations/test_validators.py
@@ -11,7 +11,7 @@
     _extract_properties_keys,
 )
 from ethology.io.annotations.validate import (
-    ValidBboxesDataset,
+    ValidBboxAnnotationsDataset,
     ValidCOCO,
     ValidVIA,
 )
@@ -557,7 +557,7 @@ def test_valid_bboxes_dataset_validation(
     expected_error_message: str,
     request: pytest.FixtureRequest,
 ):
-    """Test ValidBboxesDataset validation with various input scenarios."""
+    """Test bbox annotations dataset validator with various input scenarios."""
     # Get dataset to validate
     if isinstance(sample_dataset, str):
         dataset = request.getfixturevalue(sample_dataset)
@@ -566,7 +566,7 @@ def test_valid_bboxes_dataset_validation(
 
     # Run validation and check exception
     with expected_exception as excinfo:
-        validator = ValidBboxesDataset(dataset=dataset)
+        validator = ValidBboxAnnotationsDataset(dataset=dataset)
 
     if excinfo:
         error_msg = str(excinfo.value)

From bc43d431d661f4fd09a37fbbd1a727e329dc9ec8 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 19 Nov 2025 13:53:29 +0000
Subject: [PATCH 10/39] Add decorators to validate bbox detections dataset

---
 ethology/detectors/ensembles/fusion.py | 21 ++++++++++++++++++---
 ethology/detectors/ensembles/models.py |  9 ++++++---
 2 files changed, 24 insertions(+), 6 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 913f34c0..4d5ee7fc 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -4,10 +4,18 @@
 from functools import partial
 from typing import Literal, TypedDict, Unpack
 
+import ensemble_boxes
+from collections.abc import Callable
+from functools import partial
+from typing import Literal, TypedDict, Unpack
+
 import ensemble_boxes
 import numpy as np
 import xarray as xr
 
+from ethology.io.detections.validate import ValidBboxDetectionsDataset
+from ethology.io.validate import _check_output
+
 VALID_FUSION_METHODS = {
     "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion,
     "nms": ensemble_boxes.nms,
@@ -16,17 +24,20 @@
 }
 
 
+
 class _TypeFusionKwargs(TypedDict, total=False):
     """Type hints for fusion method kwargs.
 
     Parameters for methods as described in the ensemble_boxes documentation.
     See https://github.com/ZFTurbo/Weighted-Boxes-Fusion
 
+
     Parameters
     ----------
     weights: list[float]
         Weights for each model.
     iou_thr: float
+        IoU threshold for detections to be considered a true positive
         IoU threshold for detections to be considered a true positive
         during fusion.
     skip_box_thr: float
@@ -42,9 +53,10 @@ class _TypeFusionKwargs(TypedDict, total=False):
         - 'absent_model_aware_avg': weighted average that takes into account the absent model.
     allows_overflow: bool
         Whether to allow the confidence score of the fused detections to exceed 1.
-
+    
     """
 
+
     weights: list[float] | None
     iou_thr: float
     skip_box_thr: float
@@ -54,14 +66,17 @@ class _TypeFusionKwargs(TypedDict, total=False):
     allows_overflow: bool
 
 
-# TODO:
-# @decorator-that-checks-output-is-a-detections-dataset
+@_check_output(ValidBboxDetectionsDataset)
 def fuse_ensemble_detections(
     ensemble_detections_ds: xr.Dataset,
     fusion_method: Literal[
         "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"
     ],
     fusion_method_kwargs: dict | None = None,
+    fusion_method: Literal[
+        "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"
+    ],
+    fusion_method_kwargs: dict | None = None,
     max_n_detections: int = 500,
 ) -> xr.Dataset:
     """Fuse ensemble detections across models using WBF."""
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 4622a531..652d4876 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -12,6 +12,8 @@
 from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import pad_to_max_first_dimension
+from ethology.io.detections.validate import ValidBboxDetectionsDataset
+from ethology.io.validate import _check_output
 
 
 class EnsembleDetector(LightningModule):
@@ -123,9 +125,10 @@ def predict_step(self, batch, batch_idx):
 
         return raw_prediction_dicts_per_sample
 
-    # TODO:
-    # @decorator-that-checks-output-is-a-detections-dataset
-    def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
+    @_check_output(ValidBboxDetectionsDataset)
+    def format_predictions(
+        self, attrs: dict | None = None
+    ) -> xr.Dataset:
         """Format as ethology detections dataset with model axis."""
         # Get results from trainer
         raw_predictions_per_model = self.trainer.predict_loop.predictions

From 1564e0b2f402f4d227d2af43d71f2cbf0838f613 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 19 Nov 2025 19:25:46 +0000
Subject: [PATCH 11/39] Add abstract base class for dataset validators

---
 ethology/io/annotations/validate.py | 38 +++----------
 ethology/io/detections/validate.py  | 39 +++-----------
 ethology/io/validate.py             | 82 +++++++++++++++++++++++++++++
 3 files changed, 97 insertions(+), 62 deletions(-)

diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py
index 233e80d0..04b81a60 100644
--- a/ethology/io/annotations/validate.py
+++ b/ethology/io/annotations/validate.py
@@ -5,7 +5,6 @@
 
 import pandas as pd
 import pandera.pandas as pa
-import xarray as xr
 from attrs import define, field
 from pandera.typing import Index
 
@@ -15,6 +14,7 @@
     _check_required_keys_in_dict,
     _get_default_schema,
 )
+from ethology.io.validate import ValidDataset
 
 
 @define
@@ -225,7 +225,7 @@ def _file_contains_unique_image_IDs(self, attribute, value):
 
 
 @define
-class ValidBboxAnnotationsDataset:
+class ValidBboxAnnotationsDataset(ValidDataset):
     """Class for valid ``ethology`` bounding box annotations datasets.
 
     It checks that the input dataset has:
@@ -237,6 +237,10 @@ class ValidBboxAnnotationsDataset:
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
+    required_dims : set
+        Set of required dimension names.
+    required_data_vars : set
+        Set of required data variable names.
 
     Raises
     ------
@@ -252,9 +256,7 @@ class ValidBboxAnnotationsDataset:
 
     """
 
-    dataset: xr.Dataset = field()
-
-    # Minimum requirements for annotations datasets holding bboxes
+    # Minimum requirements for a bbox dataset holding detections
     required_dims: set = field(
         default={"image_id", "space", "id"},
         init=False,
@@ -264,32 +266,6 @@ class ValidBboxAnnotationsDataset:
         init=False,
     )
 
-    @dataset.validator
-    def _check_dataset_type(self, attribute, value):
-        """Ensure the input is an xarray Dataset."""
-        if not isinstance(value, xr.Dataset):
-            raise TypeError(
-                f"Expected an xarray Dataset, but got {type(value)}."
-            )
-
-    @dataset.validator
-    def _check_required_data_variables(self, attribute, value):
-        """Ensure the dataset has all required data variables."""
-        missing_vars = self.required_data_vars - set(value.data_vars)
-        if missing_vars:
-            raise ValueError(
-                f"Missing required data variables: {sorted(missing_vars)}"
-            )
-
-    @dataset.validator
-    def _check_required_dimensions(self, attribute, value):
-        """Ensure the dataset has all required dimensions."""
-        missing_dims = self.required_dims - set(value.dims)
-        if missing_dims:
-            raise ValueError(
-                f"Missing required dimensions: {sorted(missing_dims)}"
-            )
-
 
 class ValidBboxesDataFrame(pa.DataFrameModel):
     """Class for valid bounding boxes intermediate dataframes.
diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py
index 67075c95..7ef6285d 100644
--- a/ethology/io/detections/validate.py
+++ b/ethology/io/detections/validate.py
@@ -1,11 +1,12 @@
 """Validators for detection datasets."""
 
-import xarray as xr
 from attrs import define, field
 
+from ethology.io.validate import ValidDataset
+
 
 @define
-class ValidBboxDetectionsDataset:
+class ValidBboxDetectionsDataset(ValidDataset):
     """Class for valid ``ethology`` bounding box detections datasets.
 
     It checks that the input dataset has:
@@ -17,6 +18,10 @@ class ValidBboxDetectionsDataset:
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
+    required_dims : set
+        Set of required dimension names.
+    required_data_vars : set
+        Set of required data variable names.
 
     Raises
     ------
@@ -32,9 +37,7 @@ class ValidBboxDetectionsDataset:
 
     """
 
-    dataset: xr.Dataset = field()
-
-    # Minimum requirements for annotations datasets holding bboxes
+    # Minimum requirements for a bbox dataset holding detections
     required_dims: set = field(
         default={"image_id", "space", "id"},
         init=False,
@@ -43,29 +46,3 @@ class ValidBboxDetectionsDataset:
         default={"position", "shape", "confidence"},
         init=False,
     )
-
-    @dataset.validator
-    def _check_dataset_type(self, attribute, value):
-        """Ensure the input is an xarray Dataset."""
-        if not isinstance(value, xr.Dataset):
-            raise TypeError(
-                f"Expected an xarray Dataset, but got {type(value)}."
-            )
-
-    @dataset.validator
-    def _check_required_data_variables(self, attribute, value):
-        """Ensure the dataset has all required data variables."""
-        missing_vars = self.required_data_vars - set(value.data_vars)
-        if missing_vars:
-            raise ValueError(
-                f"Missing required data variables: {sorted(missing_vars)}"
-            )
-
-    @dataset.validator
-    def _check_required_dimensions(self, attribute, value):
-        """Ensure the dataset has all required dimensions."""
-        missing_dims = self.required_dims - set(value.dims)
-        if missing_dims:
-            raise ValueError(
-                f"Missing required dimensions: {sorted(missing_dims)}"
-            )
diff --git a/ethology/io/validate.py b/ethology/io/validate.py
index ca515b19..22c215f9 100644
--- a/ethology/io/validate.py
+++ b/ethology/io/validate.py
@@ -1,8 +1,90 @@
 """Utils for validating `ethology` objects."""
 
+from abc import ABC, abstractmethod
 from collections.abc import Callable
 from functools import wraps
 
+import xarray as xr
+from attrs import define, field
+
+
+@define
+class ValidDataset(ABC):
+    """An abstract base class for valid ``ethology`` datasets.
+
+    It checks that the input dataset has:
+
+    - required dimensions
+    - required data variables
+
+    Subclasses must define ``required_dims`` and ``required_data_vars``
+    attributes.
+
+    Attributes
+    ----------
+    dataset : xarray.Dataset
+        The xarray dataset to validate.
+    required_dims : set
+        Set of required dimension names (defined by subclasses).
+    required_data_vars : set
+        Set of required data variable names (defined by subclasses).
+
+    Raises
+    ------
+    TypeError
+        If the input is not an xarray Dataset.
+    ValueError
+        If the dataset is missing required data variables or dimensions.
+
+    Notes
+    -----
+    The dataset can have other data variables and dimensions, but only the
+    required ones are checked.
+
+    """
+
+    dataset: xr.Dataset = field()
+
+    # Subclasses should override these abstract properties
+    @property
+    @abstractmethod
+    def required_dims(self) -> set:
+        """Subclasses must provide a required_dims property."""
+        pass
+
+    @property
+    @abstractmethod
+    def required_data_vars(self) -> set:
+        """Subclasses must provide a required_data_vars property."""
+        pass
+
+    # Validators
+    @dataset.validator
+    def _check_dataset_type(self, attribute, value):
+        """Ensure the input is an xarray Dataset."""
+        if not isinstance(value, xr.Dataset):
+            raise TypeError(
+                f"Expected an xarray Dataset, but got {type(value)}."
+            )
+
+    @dataset.validator
+    def _check_required_data_variables(self, attribute, value):
+        """Ensure the dataset has all required data variables."""
+        missing_vars = self.required_data_vars - set(value.data_vars)
+        if missing_vars:
+            raise ValueError(
+                f"Missing required data variables: {sorted(missing_vars)}"
+            )
+
+    @dataset.validator
+    def _check_required_dimensions(self, attribute, value):
+        """Ensure the dataset has all required dimensions."""
+        missing_dims = self.required_dims - set(value.dims)
+        if missing_dims:
+            raise ValueError(
+                f"Missing required dimensions: {sorted(missing_dims)}"
+            )
+
 
 def _check_output(validator: type):
     """Return a decorator that validates the output of a function."""

From 329160629a7144df7c24e1389e74382bdbbd8db6 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 19 Nov 2025 19:40:59 +0000
Subject: [PATCH 12/39] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/detectors/ensembles/fusion.py | 2 +-
 ethology/detectors/ensembles/models.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 4d5ee7fc..f7cf8e61 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -53,7 +53,7 @@ class _TypeFusionKwargs(TypedDict, total=False):
         - 'absent_model_aware_avg': weighted average that takes into account the absent model.
     allows_overflow: bool
         Whether to allow the confidence score of the fused detections to exceed 1.
-    
+
     """
 
 
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 652d4876..92869d47 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -126,9 +126,7 @@ def predict_step(self, batch, batch_idx):
         return raw_prediction_dicts_per_sample
 
     @_check_output(ValidBboxDetectionsDataset)
-    def format_predictions(
-        self, attrs: dict | None = None
-    ) -> xr.Dataset:
+    def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
         """Format as ethology detections dataset with model axis."""
         # Get results from trainer
         raw_predictions_per_model = self.trainer.predict_loop.predictions

From c384340c8d47c7fdc3856ff623413407ae53939b Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 11:57:28 +0000
Subject: [PATCH 13/39] pre-commit fixes

---
 ethology/detectors/ensembles/fusion.py | 39 ++++++++++++--------------
 ethology/detectors/ensembles/models.py | 13 +++++++--
 ethology/detectors/ensembles/utils.py  |  2 +-
 3 files changed, 29 insertions(+), 25 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index f7cf8e61..a48914ac 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -4,11 +4,6 @@
 from functools import partial
 from typing import Literal, TypedDict, Unpack
 
-import ensemble_boxes
-from collections.abc import Callable
-from functools import partial
-from typing import Literal, TypedDict, Unpack
-
 import ensemble_boxes
 import numpy as np
 import xarray as xr
@@ -24,7 +19,6 @@
 }
 
 
-
 class _TypeFusionKwargs(TypedDict, total=False):
     """Type hints for fusion method kwargs.
 
@@ -48,15 +42,17 @@ class _TypeFusionKwargs(TypedDict, total=False):
         Threshold for boxes to keep after soft NMS.
     conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"]
         Method to compute the confidence score of the fused detections.
+
         - "avg": Average confidence score of the fused detections (default).
         - 'box_and_model_avg': box and model wise hybrid weighted average.
-        - 'absent_model_aware_avg': weighted average that takes into account the absent model.
+        - 'absent_model_aware_avg': weighted average that takes into account
+          the absent model.
     allows_overflow: bool
-        Whether to allow the confidence score of the fused detections to exceed 1.
+        Whether to allow the confidence score of the fused detections to
+        exceed 1.
 
     """
 
-
     weights: list[float] | None
     iou_thr: float
     skip_box_thr: float
@@ -73,10 +69,6 @@ def fuse_ensemble_detections(
         "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"
     ],
     fusion_method_kwargs: dict | None = None,
-    fusion_method: Literal[
-        "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"
-    ],
-    fusion_method_kwargs: dict | None = None,
     max_n_detections: int = 500,
 ) -> xr.Dataset:
     """Fuse ensemble detections across models using WBF."""
@@ -84,9 +76,9 @@ def fuse_ensemble_detections(
     image_shape = ensemble_detections_ds.attrs.get("image_shape")
     if image_shape is None:
         raise KeyError(
-            "Required attribute 'image_shape' not found in the dataset attributes. "
-            "Please ensure the dataset has 'image_shape' (width, height in pixels) "
-            "in its attributes."
+            "Required attribute 'image_shape' not found in the dataset "
+            "attributes. Please ensure the dataset has 'image_shape' "
+            "(width, height in pixels) in its attributes."
         )
     else:
         image_width_height = _validate_image_shape(image_shape)
@@ -194,7 +186,7 @@ def _preprocess_single_image_detections(
     confidence: xr.DataArray,
     label: xr.DataArray,
     image_width_height: np.ndarray,
-) -> list[np.ndarray]:
+) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
     """Prepare ensemble detections on a single image for fusion."""
     # Prepare boxes array --> position, shape arrays to x1y1x2y normalised
     bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None]
@@ -252,7 +244,10 @@ def _postprocess_single_image_detections(
     image_width_height,
     max_n_detections,
 ):
-    """Unnormalise, pad and format fused single-image detections as data arrays."""
+    """Postprocess fused single-image detections as dataarrays.
+
+    Unnormalise, pad and format as data arrays.
+    """
     # Undo boxes x1y1 x2y2 normalization
     ensemble_x1y1_x2y2 = ensemble_x1y1_x2y2_norm * np.tile(
         image_width_height, (1, 2)
@@ -272,9 +267,11 @@ def _postprocess_single_image_detections(
     if ensemble_x1y2_x2y2_scores_labels.shape[0] > max_n_detections:
         raise ValueError(
             "Insufficient padding provided. "
-            f"The estimated maximum number of detections per image was set to {max_n_detections}, "
-            f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were found in one of the images "
-            "after fusion. Please increase the maximum number of detections per image."
+            "The estimated maximum number of detections per image was set to "
+            f"{max_n_detections}, "
+            f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were "
+            "found in one of the images after fusion. Please increase the "
+            "maximum number of detections per image."
         )
 
     # Pad combined array to max_n_detections
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 92869d47..c21ac63b 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -27,6 +27,7 @@ class EnsembleDetector(LightningModule):
     """
 
     def __init__(self, config_file: str | Path):
+        """Initialise ensemble of detectors."""
         super().__init__()
 
         # Load config
@@ -35,7 +36,7 @@ def __init__(self, config_file: str | Path):
             self.config = yaml.safe_load(f)
 
         # Run checks
-        self._validate_model_class()
+        self._validate_model_class(self.config["models"]["model_class"])
 
         # Load list of models (nn.ModuleList)
         self.list_models = self._load_models()
@@ -137,7 +138,11 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
         )  # [sample][model]
 
         # Parse output from dicts
-        output_per_sample = {"boxes": [], "scores": [], "labels": []}
+        output_per_sample: dict[str, list] = {
+            "boxes": [],
+            "scores": [],
+            "labels": [],
+        }
         for ky in output_per_sample:
             output_per_sample[ky] = [
                 [sample[m][ky] for m in range(len(self.list_models))]
@@ -146,7 +151,9 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
 
         # Pad across models and across image_ids
         fill_value = {"boxes": np.nan, "scores": np.nan, "labels": -1}
-        output_per_sample_padded = {ky: [] for ky in output_per_sample}
+        output_per_sample_padded: dict[str, list] = {
+            ky: [] for ky in output_per_sample
+        }
         for ky in output_per_sample_padded:
             output_per_sample_padded[ky] = pad_to_max_first_dimension(
                 [
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
index 75d8e907..0ab1e2f8 100644
--- a/ethology/detectors/ensembles/utils.py
+++ b/ethology/detectors/ensembles/utils.py
@@ -11,7 +11,7 @@ def get_padding_width(array, max_n):
 
 
 def pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
-    """Pad arrays to maximum number across all arrays in the first dimension."""
+    """Pad arrays in list to maximum size of their first dimension."""
     max_n_detections = max(array.shape[0] for array in list_arrays)
     list_arrays_padded = [
         np.pad(

From 7c076a247463f65b606ca5b52fe483b336c71b98 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 12:05:30 +0000
Subject: [PATCH 14/39] Small comments

---
 ethology/detectors/ensembles/models.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index c21ac63b..a7e8ea8d 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -108,8 +108,6 @@ def predict_step(self, batch, batch_idx):
         """Predict step for a single batch."""
         # ------------------------------
         # Run all models in ensemble in GPU
-        # TODO: can I vectorize this?
-        # https://docs.pytorch.org/tutorials/intermediate/ensembling.html
         images_batch, _annotations_batch = batch
         raw_prediction_dicts_per_model = [
             model(images_batch) for model in self.list_models

From a610e2036ae486c3097482a216a267b3ac9a5393 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:04:09 +0000
Subject: [PATCH 15/39] Fix weights loading

---
 ethology/detectors/ensembles/models.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index a7e8ea8d..7ef8311f 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -54,12 +54,8 @@ def _validate_model_class(model_class_str: str) -> None:
 
     def _load_models(self) -> nn.ModuleList:
         """Load models from checkpoints."""
-        # Get model architecture
+        # Get model config
         models_config = self.config["models"]
-        model = get_model(
-            models_config["model_class"],
-            **models_config.get("model_kwargs", {}),
-        )
 
         # Load weights
         list_models = []
@@ -67,10 +63,15 @@ def _load_models(self) -> nn.ModuleList:
             # Get checkpoint
             checkpoint = torch.load(checkpoint_path, map_location=self.device)
 
-            # Load state dict
+            # Instantiate model with ckpt weights
+            model = get_model(
+                models_config["model_class"],
+                **models_config.get("model_kwargs", {}),
+            )
             model_state_dict = self._get_model_state_dict(checkpoint)
             model.load_state_dict(model_state_dict, strict=True)
 
+            # Append model to list
             list_models.append(model)
 
         return nn.ModuleList(list_models)

From c6c05eefa3997dd9bb75f4501aec916276ff4a8e Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:11:03 +0000
Subject: [PATCH 16/39] Filter low confidence predictions when evaluating
 single models in the ensemble

---
 examples/ensemble_of_detectors.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index dcb28d1c..ea81dd8f 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -245,7 +245,6 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # TODO: think whether joblib approach is more readable?
 image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
 ensemble_detections_ds.attrs["image_shape"] = image_width_height
-
 config_fusion = config["fusion"]
 
 
@@ -324,8 +323,14 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Evaluate single models
 list_detections_ds_eval = []
 for k in range(ensemble_detections_ds.sizes["model"]):
+    # filter low confidence detections (for a fairer comparison)
+    detections_one_model = ensemble_detections_ds.where(
+        ensemble_detections_ds.confidence >= confidence_threshold_post_fusion
+    ).sel(model=k)
+
+    # evaluate
     detections_ds, _ = compute_precision_recall_ds(
-        pred_bboxes_ds=ensemble_detections_ds.sel(model=k),
+        pred_bboxes_ds=detections_one_model,
         gt_bboxes_ds=gt_bboxes_ds,
         iou_threshold=iou_threshold_tp,
     )

From db5910e5693843ab49d7ae77094622f0a77d45d5 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 17:20:57 +0000
Subject: [PATCH 17/39] Rename variable

---
 ethology/detectors/ensembles/fusion.py | 22 ++++++++++------------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index a48914ac..4f46b3a4 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -254,32 +254,30 @@ def _postprocess_single_image_detections(
     )
 
     # Combine x1y1, x2y2, scores and labels in one array
-    ensemble_x1y2_x2y2_scores_labels = np.c_[
-        ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels
-    ]
+    ensemble_data = np.c_[ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels]
 
     # Remove rows with nan coordinates
-    ensemble_x1y2_x2y2_scores_labels = ensemble_x1y2_x2y2_scores_labels[
+    ensemble_data = ensemble_data[
         ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1)
     ]
 
     # Check padding
-    if ensemble_x1y2_x2y2_scores_labels.shape[0] > max_n_detections:
+    if ensemble_data.shape[0] > max_n_detections:
         raise ValueError(
             "Insufficient padding provided. "
             "The estimated maximum number of detections per image was set to "
             f"{max_n_detections}, "
-            f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were "
+            f"but {ensemble_data.shape[0]} detections were "
             "found in one of the images after fusion. Please increase the "
             "maximum number of detections per image."
         )
 
     # Pad combined array to max_n_detections
     # (this is required to concatenate across image_ids)
-    ensemble_x1y2_x2y2_scores_labels = np.pad(
-        ensemble_x1y2_x2y2_scores_labels,
+    ensemble_data = np.pad(
+        ensemble_data,
         (
-            (0, max_n_detections - ensemble_x1y2_x2y2_scores_labels.shape[0]),
+            (0, max_n_detections - ensemble_data.shape[0]),
             (0, 0),
         ),
         "constant",
@@ -289,9 +287,9 @@ def _postprocess_single_image_detections(
     # Format output as xarray dataarrays
     centroid_da, shape_da, confidence_da, label_da = (
         _single_image_detections_as_dataarrays(
-            ensemble_x1y2_x2y2_scores_labels[:, 0:4],
-            ensemble_x1y2_x2y2_scores_labels[:, 4],
-            ensemble_x1y2_x2y2_scores_labels[:, 5],
+            ensemble_data[:, 0:4],
+            ensemble_data[:, 4],
+            ensemble_data[:, 5],
         )
     )
 

From ef689897c7acae90d645a2bd1fc8a3fcd58da0e9 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 18:03:20 +0000
Subject: [PATCH 18/39] Compute upper bound for max number of detections after
 fusion automatically

---
 ethology/detectors/ensembles/fusion.py | 48 +++++++++++++++++++-------
 1 file changed, 36 insertions(+), 12 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 4f46b3a4..8f29985a 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -18,6 +18,10 @@
     "non_maxium_weighted": ensemble_boxes.non_maximum_weighted,
 }
 
+fusion_method_type = Literal[
+    "weighted_boxes_fusion", "nms", "soft_nms", "non_maxium_weighted"
+]
+
 
 class _TypeFusionKwargs(TypedDict, total=False):
     """Type hints for fusion method kwargs.
@@ -63,15 +67,22 @@ class _TypeFusionKwargs(TypedDict, total=False):
 
 
 @_check_output(ValidBboxDetectionsDataset)
-def fuse_ensemble_detections(
+def fuse_detections(
     ensemble_detections_ds: xr.Dataset,
-    fusion_method: Literal[
-        "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"
-    ],
+    fusion_method: fusion_method_type,
     fusion_method_kwargs: dict | None = None,
-    max_n_detections: int = 500,
+    max_n_detections: int | None = None,
 ) -> xr.Dataset:
-    """Fuse ensemble detections across models using WBF."""
+    """Fuse ensemble detections across models using WBF.
+
+    You can set a max_n_detections if upper bound is known a prior to
+    reduce memory usage.
+
+    """
+    # Check if input dataset has 'model' dimension
+    if "model" not in ensemble_detections_ds.dims:
+        raise ValueError("Input dataset must have 'model' dimension. ")
+
     # Check if image_width_height defined in dataset
     image_shape = ensemble_detections_ds.attrs.get("image_shape")
     if image_shape is None:
@@ -83,6 +94,10 @@ def fuse_ensemble_detections(
     else:
         image_width_height = _validate_image_shape(image_shape)
 
+    # Compute upper bound of max_n_detections
+    if not max_n_detections:
+        max_n_detections = _estimate_max_n_detections(ensemble_detections_ds)
+
     # Build single-image partial fusion function for the selected method
     if fusion_method not in VALID_FUSION_METHODS:
         raise ValueError(
@@ -94,10 +109,6 @@ def fuse_ensemble_detections(
         _fuse_single_image_detections, fusion_function
     )
 
-    # Prepare kwargs for fusion function
-    if not fusion_method_kwargs:
-        fusion_method_kwargs = {}
-
     # Run fusion across image_id using apply_ufunc
     centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
         xr.apply_ufunc(
@@ -109,7 +120,7 @@ def fuse_ensemble_detections(
             kwargs={
                 "image_width_height": image_width_height,
                 "max_n_detections": max_n_detections,
-                **fusion_method_kwargs,
+                **(fusion_method_kwargs if fusion_method_kwargs else {}),
             },
             input_core_dims=[  # do not broadcast across these
                 ["space", "id", "model"],  # centroid
@@ -132,7 +143,7 @@ def fuse_ensemble_detections(
         )
     )
 
-    # Post process data arrays
+    # Postprocess data arrays
     fused_data_arrays = {
         "position": centroid_fused_da,
         "shape": shape_fused_da,
@@ -180,6 +191,19 @@ def _validate_image_shape(image_shape) -> np.ndarray:
     return image_shape
 
 
+def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int:
+    """Get upper bound for maximum number of boxes per image after fusion."""
+    detections_w_non_nan_position = (
+        ensemble_detections_ds.position.notnull().all(dim="space")
+    )  # True if non-nan x and y
+    return (
+        detections_w_non_nan_position.sum(dim="id")
+        .max(dim="image_id")
+        .sum()
+        .item()
+    )
+
+
 def _preprocess_single_image_detections(
     position: xr.DataArray,
     shape: xr.DataArray,

From bce1ee701e18fd612f0ee82b69492e958b931184 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 18:11:50 +0000
Subject: [PATCH 19/39] Use new validators module

---
 ethology/detectors/ensembles/fusion.py |   4 +-
 ethology/detectors/ensembles/models.py |   4 +-
 ethology/detectors/evaluate.py         |   2 +-
 ethology/io/detections/validate.py     |  48 ----------
 ethology/io/validate.py                | 121 -------------------------
 examples/ensemble_of_detectors.py      |  41 +++++----
 6 files changed, 27 insertions(+), 193 deletions(-)
 delete mode 100644 ethology/io/detections/validate.py
 delete mode 100644 ethology/io/validate.py

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 8f29985a..415f9591 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -8,8 +8,8 @@
 import numpy as np
 import xarray as xr
 
-from ethology.io.detections.validate import ValidBboxDetectionsDataset
-from ethology.io.validate import _check_output
+from ethology.validators.detections import ValidBboxDetectionsDataset
+from ethology.validators.utils import _check_output
 
 VALID_FUSION_METHODS = {
     "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion,
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 7ef8311f..2d339ba2 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -12,8 +12,8 @@
 from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import pad_to_max_first_dimension
-from ethology.io.detections.validate import ValidBboxDetectionsDataset
-from ethology.io.validate import _check_output
+from ethology.validators.detections import ValidBboxDetectionsDataset
+from ethology.validators.utils import _check_output
 
 
 class EnsembleDetector(LightningModule):
diff --git a/ethology/detectors/evaluate.py b/ethology/detectors/evaluate.py
index f991420c..1ae34104 100644
--- a/ethology/detectors/evaluate.py
+++ b/ethology/detectors/evaluate.py
@@ -81,7 +81,7 @@ def evaluate_detections_hungarian_ds(
 
 def _evaluate_detections_hungarian_arrays(
     pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, iou_threshold: float
-) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+) -> tuple[np.ndarray, ...]:
     """Compute true positives, false positives, and missed detections.
 
     Uses Hungarian algorithm for matching and takes arrays of bboxes as input
diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py
deleted file mode 100644
index 7ef6285d..00000000
--- a/ethology/io/detections/validate.py
+++ /dev/null
@@ -1,48 +0,0 @@
-"""Validators for detection datasets."""
-
-from attrs import define, field
-
-from ethology.io.validate import ValidDataset
-
-
-@define
-class ValidBboxDetectionsDataset(ValidDataset):
-    """Class for valid ``ethology`` bounding box detections datasets.
-
-    It checks that the input dataset has:
-
-    - ``image_id``, ``space``, ``id`` as dimensions
-    - ``position``, ``shape`` and ``confidence`` as data variables
-
-    Attributes
-    ----------
-    dataset : xarray.Dataset
-        The xarray dataset to validate.
-    required_dims : set
-        Set of required dimension names.
-    required_data_vars : set
-        Set of required data variable names.
-
-    Raises
-    ------
-    TypeError
-        If the input is not an xarray Dataset.
-    ValueError
-        If the dataset is missing required data variables or dimensions.
-
-    Notes
-    -----
-    The dataset can have other data variables and dimensions, but only the
-    required ones are checked.
-
-    """
-
-    # Minimum requirements for a bbox dataset holding detections
-    required_dims: set = field(
-        default={"image_id", "space", "id"},
-        init=False,
-    )
-    required_data_vars: set = field(
-        default={"position", "shape", "confidence"},
-        init=False,
-    )
diff --git a/ethology/io/validate.py b/ethology/io/validate.py
deleted file mode 100644
index 22c215f9..00000000
--- a/ethology/io/validate.py
+++ /dev/null
@@ -1,121 +0,0 @@
-"""Utils for validating `ethology` objects."""
-
-from abc import ABC, abstractmethod
-from collections.abc import Callable
-from functools import wraps
-
-import xarray as xr
-from attrs import define, field
-
-
-@define
-class ValidDataset(ABC):
-    """An abstract base class for valid ``ethology`` datasets.
-
-    It checks that the input dataset has:
-
-    - required dimensions
-    - required data variables
-
-    Subclasses must define ``required_dims`` and ``required_data_vars``
-    attributes.
-
-    Attributes
-    ----------
-    dataset : xarray.Dataset
-        The xarray dataset to validate.
-    required_dims : set
-        Set of required dimension names (defined by subclasses).
-    required_data_vars : set
-        Set of required data variable names (defined by subclasses).
-
-    Raises
-    ------
-    TypeError
-        If the input is not an xarray Dataset.
-    ValueError
-        If the dataset is missing required data variables or dimensions.
-
-    Notes
-    -----
-    The dataset can have other data variables and dimensions, but only the
-    required ones are checked.
-
-    """
-
-    dataset: xr.Dataset = field()
-
-    # Subclasses should override these abstract properties
-    @property
-    @abstractmethod
-    def required_dims(self) -> set:
-        """Subclasses must provide a required_dims property."""
-        pass
-
-    @property
-    @abstractmethod
-    def required_data_vars(self) -> set:
-        """Subclasses must provide a required_data_vars property."""
-        pass
-
-    # Validators
-    @dataset.validator
-    def _check_dataset_type(self, attribute, value):
-        """Ensure the input is an xarray Dataset."""
-        if not isinstance(value, xr.Dataset):
-            raise TypeError(
-                f"Expected an xarray Dataset, but got {type(value)}."
-            )
-
-    @dataset.validator
-    def _check_required_data_variables(self, attribute, value):
-        """Ensure the dataset has all required data variables."""
-        missing_vars = self.required_data_vars - set(value.data_vars)
-        if missing_vars:
-            raise ValueError(
-                f"Missing required data variables: {sorted(missing_vars)}"
-            )
-
-    @dataset.validator
-    def _check_required_dimensions(self, attribute, value):
-        """Ensure the dataset has all required dimensions."""
-        missing_dims = self.required_dims - set(value.dims)
-        if missing_dims:
-            raise ValueError(
-                f"Missing required dimensions: {sorted(missing_dims)}"
-            )
-
-
-def _check_output(validator: type):
-    """Return a decorator that validates the output of a function."""
-
-    def decorator(function: Callable) -> Callable:
-        @wraps(function)  # to preserve function metadata
-        def wrapper(*args, **kwargs):
-            result = function(*args, **kwargs)
-            validator(result)
-            return result
-
-        return wrapper
-
-    return decorator
-
-
-def _check_input(validator: type, input_index: int = 0):
-    """Return a decorator that validates a specific input of a function.
-
-    By default, the first input is validated. If the input index is
-    larger than the number of inputs, no validation is performed.
-    """
-
-    def decorator(function: Callable) -> Callable:
-        @wraps(function)
-        def wrapper(*args, **kwargs):
-            if len(args) > input_index:
-                validator(args[input_index])
-            result = function(*args, **kwargs)
-            return result
-
-        return wrapper
-
-    return decorator
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index ea81dd8f..11e9e862 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -1,3 +1,4 @@
+"""Evaluating ensemble of trained detectors."""
 # %%
 # imports
 
@@ -13,7 +14,7 @@
 from torch.utils.data import DataLoader
 from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
 
-from ethology.detectors.ensembles.fusion import fuse_ensemble_detections
+from ethology.detectors.ensembles.fusion import fuse_detections
 from ethology.detectors.ensembles.models import EnsembleDetector
 from ethology.detectors.evaluate import compute_precision_recall_ds
 from ethology.io.annotations import load_bboxes
@@ -165,11 +166,13 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
     "fusion": {
         "method": "weighted_boxes_fusion",
         # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted"
-        "method_kwargs": {  # arguments as in ensemble_boxes.weighted_boxes_fusion
+        "method_kwargs": {
+            # arguments as in ensemble_boxes.weighted_boxes_fusion
             "iou_thr": 0.5,  # iou threshold for the ensemble
             "skip_box_thr": 0.0001,
         },
-        # "n_jobs": -1,  # workers for joblib.Parallel, n_workers should be <= number of CPU cores
+        # "n_jobs": -1,  # workers for joblib.Parallel,
+        # n_workers should be <= number of CPU cores
         # "confidence_threshold_post_fusion": 0.0,
         "max_n_detections": 300,
     },
@@ -245,11 +248,10 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # TODO: think whether joblib approach is more readable?
 image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
 ensemble_detections_ds.attrs["image_shape"] = image_width_height
-config_fusion = config["fusion"]
+config_fusion: dict = config["fusion"]
 
 
-# %%
-fused_detections_ds = fuse_ensemble_detections(
+fused_detections_ds = fuse_detections(
     ensemble_detections_ds,
     fusion_method=config_fusion["method"],
     fusion_method_kwargs=config_fusion["method_kwargs"],
@@ -261,18 +263,18 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Fuse detections across models with NMS
 
-fused_detections_nms_ds = fuse_ensemble_detections(
-    ensemble_detections_ds,
-    fusion_method="soft_nms",
-    fusion_method_kwargs={
-        "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
-        "sigma": 0.5,
-        "thresh": 0.001,
-    },
-    max_n_detections=500,
-)
-
-fused_detections_ds = fused_detections_nms_ds
+# fused_detections_nms_ds = fuse_ensemble_detections(
+#     ensemble_detections_ds,
+#     fusion_method="soft_nms",
+#     fusion_method_kwargs={
+#         "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
+#         "sigma": 0.5,
+#         "thresh": 0.001,
+#     },
+#     max_n_detections=500,
+# )
+
+# fused_detections_ds = fused_detections_nms_ds
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Remove low confidence detections
 confidence_threshold_post_fusion = 0.5
@@ -294,7 +296,8 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
     iou_threshold=iou_threshold_tp,
 )
 
-# All models on full August dataset, without removing low confidence detections:
+# All models on full August dataset, without removing low
+# confidence detections:
 # confidence_threshold_post_fusion = 0.0
 # Precision: 0.5920
 # Recall: 0.8455

From c26af244d63ea50d5e88a4b4a980f2a2509ed66b Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 21 Nov 2025 18:21:04 +0000
Subject: [PATCH 20/39] Add an ensemble detections dataset validator

---
 ethology/detectors/ensembles/fusion.py | 13 +++---
 ethology/detectors/ensembles/models.py |  4 +-
 ethology/validators/detections.py      | 60 ++++++++++++++++++++++++++
 3 files changed, 69 insertions(+), 8 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 415f9591..d04c5d48 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -8,8 +8,11 @@
 import numpy as np
 import xarray as xr
 
-from ethology.validators.detections import ValidBboxDetectionsDataset
-from ethology.validators.utils import _check_output
+from ethology.validators.detections import (
+    ValidBboxDetectionsDataset,
+    ValidBboxDetectionsEnsembleDataset,
+)
+from ethology.validators.utils import _check_input, _check_output
 
 VALID_FUSION_METHODS = {
     "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion,
@@ -66,6 +69,7 @@ class _TypeFusionKwargs(TypedDict, total=False):
     allows_overflow: bool
 
 
+@_check_input(ValidBboxDetectionsEnsembleDataset)
 @_check_output(ValidBboxDetectionsDataset)
 def fuse_detections(
     ensemble_detections_ds: xr.Dataset,
@@ -79,10 +83,6 @@ def fuse_detections(
     reduce memory usage.
 
     """
-    # Check if input dataset has 'model' dimension
-    if "model" not in ensemble_detections_ds.dims:
-        raise ValueError("Input dataset must have 'model' dimension. ")
-
     # Check if image_width_height defined in dataset
     image_shape = ensemble_detections_ds.attrs.get("image_shape")
     if image_shape is None:
@@ -191,6 +191,7 @@ def _validate_image_shape(image_shape) -> np.ndarray:
     return image_shape
 
 
+@_check_input(ValidBboxDetectionsEnsembleDataset)
 def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int:
     """Get upper bound for maximum number of boxes per image after fusion."""
     detections_w_non_nan_position = (
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 2d339ba2..7545c4b9 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -12,7 +12,7 @@
 from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import pad_to_max_first_dimension
-from ethology.validators.detections import ValidBboxDetectionsDataset
+from ethology.validators.detections import ValidBboxDetectionsEnsembleDataset
 from ethology.validators.utils import _check_output
 
 
@@ -125,7 +125,7 @@ def predict_step(self, batch, batch_idx):
 
         return raw_prediction_dicts_per_sample
 
-    @_check_output(ValidBboxDetectionsDataset)
+    @_check_output(ValidBboxDetectionsEnsembleDataset)
     def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
         """Format as ethology detections dataset with model axis."""
         # Get results from trainer
diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py
index 1f6d9df6..a22dab62 100644
--- a/ethology/validators/detections.py
+++ b/ethology/validators/detections.py
@@ -62,3 +62,63 @@ class ValidBboxDetectionsDataset(ValidDataset):
         },
         init=False,
     )
+
+
+@define
+class ValidBboxDetectionsEnsembleDataset(ValidDataset):
+    """Class for valid ``ethology`` bounding box ensembledetections datasets.
+
+    This class validates that the input dataset:
+
+    - is an xarray Dataset,
+    - has ``image_id``, ``space``, ``id`` and ``model`` as dimensions,
+    - has ``position``, ``shape`` and ``confidence`` as data variables,
+    - ``position`` and ``shape`` span at least the dimensions ``image_id``,
+      ``space``, ``id`` and ``model``,
+    - ``confidence`` spans at least the dimensions ``image_id``, ``id``
+      and ``model``.
+
+
+    Attributes
+    ----------
+    dataset : xarray.Dataset
+        The xarray dataset to validate.
+    required_dims : set
+        The set of required dimension names: ``image_id``, ``space``, ``id``
+         and ``model``.
+    required_data_vars : dict[str, set]
+        A dictionary mapping data variable names to their required minimum
+        dimensions:
+
+        - ``position`` maps to ``image_id``, ``space``, ``id`` and ``model``,
+        - ``shape`` maps to ``image_id``, ``space``, ``id`` and ``model``,
+        - ``confidence`` maps to ``image_id``, ``id`` and ``model``.
+
+    Raises
+    ------
+    TypeError
+        If the input is not an xarray Dataset.
+    ValueError
+        If the dataset is missing required data variables or dimensions,
+        or if any required dimensions are missing for any data variable.
+
+    Notes
+    -----
+    The dataset can have other data variables and dimensions, but only the
+    required ones are checked.
+
+    """
+
+    # Minimum requirements for a bbox dataset holding detections
+    required_dims: set = field(
+        default={"image_id", "space", "id", "model"},
+        init=False,
+    )
+    required_data_vars: dict = field(
+        default={
+            "position": {"image_id", "space", "id", "model"},
+            "shape": {"image_id", "space", "id", "model"},
+            "confidence": {"image_id", "id", "model"},
+        },
+        init=False,
+    )

From 4440c12b3f813c8d0f01aa5d1bc68a577f6bd622 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 3 Dec 2025 12:55:52 +0000
Subject: [PATCH 21/39] Fix output validator

---
 ethology/detectors/ensembles/fusion.py | 25 +++++++++++++------------
 ethology/detectors/ensembles/models.py |  1 -
 examples/ensemble_of_detectors.py      |  8 ++++++--
 3 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index d04c5d48..3af74e52 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -144,14 +144,11 @@ def fuse_detections(
     )
 
     # Postprocess data arrays
-    fused_data_arrays = {
-        "position": centroid_fused_da,
-        "shape": shape_fused_da,
-        "confidence": confidence_fused_da,
-        "label": label_fused_da,
-    }
     fused_data_arrays = _postprocess_multi_image_fused_arrays(
-        **fused_data_arrays
+        position = centroid_fused_da,
+        shape = shape_fused_da,
+        confidence = confidence_fused_da,
+        label = label_fused_da,
     )
 
     # Return a dataset
@@ -408,7 +405,7 @@ def _postprocess_multi_image_fused_arrays(
     """Postprocess fused data arrays on multiple images after fusion."""
     data_arrays = [position, shape, confidence, label]
 
-    # Remove padding across annotations
+    # Remove extra padding across annotations
     position_da, shape_da, confidence_da, label_da = [
         da.dropna(dim="id", how="all") for da in data_arrays
     ]
@@ -416,9 +413,13 @@ def _postprocess_multi_image_fused_arrays(
     # Pad labels with -1 rather than nan
     label_da = label_da.fillna(-1).astype(int)
 
+    # Assign id coordinates to data arrays 
+    # (these are lost after apply_ufunc because exclude_dims is used)
+    n_max_detections = position_da.sizes["id"]
+    id_coords = np.arange(n_max_detections)
     return {
-        "position": position_da,
-        "shape": shape_da,
-        "confidence": confidence_da,
-        "label": label_da,
+        "position": position_da.assign_coords(id=id_coords),
+        "shape": shape_da.assign_coords(id=id_coords),
+        "confidence": confidence_da.assign_coords(id=id_coords),
+        "label": label_da.assign_coords(id=id_coords),
     }
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 7545c4b9..03d20211 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -107,7 +107,6 @@ def _get_model_state_dict(checkpoint):
 
     def predict_step(self, batch, batch_idx):
         """Predict step for a single batch."""
-        # ------------------------------
         # Run all models in ensemble in GPU
         images_batch, _annotations_batch = batch
         raw_prediction_dicts_per_model = [
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index 11e9e862..e815b26b 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -210,7 +210,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # Some nice plots:
 # ensemble_detections_ds.confidence.sel(image_id=0).plot()
 # ensemble_detections_ds.confidence.sel(model=0).plot()
-for m in range(5):
+for m in range(ensemble_detections_ds.model.size):
     plt.figure()
     ensemble_detections_ds.confidence.sel(model=m).plot()
 
@@ -260,6 +260,10 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
     # ---- method kwargs ----
 )
 
+# %%
+from ethology.validators.detections import ValidBboxDetectionsDataset
+ValidBboxDetectionsDataset(fused_detections_ds)
+
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Fuse detections across models with NMS
 
@@ -277,7 +281,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 # fused_detections_ds = fused_detections_nms_ds
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Remove low confidence detections
-confidence_threshold_post_fusion = 0.5
+confidence_threshold_post_fusion = 0.4
 fused_detections_ds_ = fused_detections_ds.where(
     fused_detections_ds.confidence >= confidence_threshold_post_fusion
 )

From b38080dbba6fa25b7d29bd4a83691a18f76526ac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 3 Dec 2025 12:56:33 +0000
Subject: [PATCH 22/39] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/detectors/ensembles/fusion.py | 10 +++++-----
 examples/ensemble_of_detectors.py      |  1 +
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 3af74e52..1bd08ed1 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -145,10 +145,10 @@ def fuse_detections(
 
     # Postprocess data arrays
     fused_data_arrays = _postprocess_multi_image_fused_arrays(
-        position = centroid_fused_da,
-        shape = shape_fused_da,
-        confidence = confidence_fused_da,
-        label = label_fused_da,
+        position=centroid_fused_da,
+        shape=shape_fused_da,
+        confidence=confidence_fused_da,
+        label=label_fused_da,
     )
 
     # Return a dataset
@@ -413,7 +413,7 @@ def _postprocess_multi_image_fused_arrays(
     # Pad labels with -1 rather than nan
     label_da = label_da.fillna(-1).astype(int)
 
-    # Assign id coordinates to data arrays 
+    # Assign id coordinates to data arrays
     # (these are lost after apply_ufunc because exclude_dims is used)
     n_max_detections = position_da.sizes["id"]
     id_coords = np.arange(n_max_detections)
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
index e815b26b..d8ebb51c 100644
--- a/examples/ensemble_of_detectors.py
+++ b/examples/ensemble_of_detectors.py
@@ -262,6 +262,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
 
 # %%
 from ethology.validators.detections import ValidBboxDetectionsDataset
+
 ValidBboxDetectionsDataset(fused_detections_ds)
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

From b88a412fbf7b2c814480aad2703ba5b3021c7da0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Wed, 3 Dec 2025 12:56:34 +0000
Subject: [PATCH 23/39] Add ensembles yaml

---
 examples/ensemble_of_detectors.yaml | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)
 create mode 100644 examples/ensemble_of_detectors.yaml

diff --git a/examples/ensemble_of_detectors.yaml b/examples/ensemble_of_detectors.yaml
new file mode 100644
index 00000000..80de260b
--- /dev/null
+++ b/examples/ensemble_of_detectors.yaml
@@ -0,0 +1,19 @@
+models:
+  model_class: fasterrcnn_resnet50_fpn_v2
+  model_kwargs:
+    num_classes: 2
+    weights: null
+    weights_backbone: null
+  checkpoints:
+  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt
+  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt
+  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/75583ec227e3444ab692b99c64795325/checkpoints/last.ckpt
+  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/4acc37206b1e4f679d535c837bee2c2f/checkpoints/last.ckpt
+  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/fdcf88fcbcc84fbeb94b45ca6b6f8914/checkpoints/last.ckpt
+  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/daa05ded0ea047388c9134bf044061c5/checkpoints/last.ckpt
+fusion:
+  method: weighted_boxes_fusion
+  method_kwargs:
+    iou_thr: 0.5
+    skip_box_thr: 0.0001
+  max_n_detections: 300

From 1b9476a0d2c1353c269963ff23f0b8844c671981 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 09:39:59 +0000
Subject: [PATCH 24/39] Stop tracking evaluation

---
 ethology/detectors/evaluate.py      | 245 -------------------
 examples/ensemble_of_detectors.py   | 353 ----------------------------
 examples/ensemble_of_detectors.yaml |  19 --
 3 files changed, 617 deletions(-)
 delete mode 100644 ethology/detectors/evaluate.py
 delete mode 100644 examples/ensemble_of_detectors.py
 delete mode 100644 examples/ensemble_of_detectors.yaml

diff --git a/ethology/detectors/evaluate.py b/ethology/detectors/evaluate.py
deleted file mode 100644
index 1ae34104..00000000
--- a/ethology/detectors/evaluate.py
+++ /dev/null
@@ -1,245 +0,0 @@
-"""Utilities for evaluating detectors."""
-
-import numpy as np
-import torch
-import torchvision.ops as ops
-import xarray as xr
-from scipy.optimize import linear_sum_assignment
-
-
-def evaluate_detections_hungarian_ds(
-    pred_bboxes_ds: xr.Dataset,
-    gt_bboxes_ds: xr.Dataset,
-    iou_threshold: float,
-) -> tuple[xr.Dataset, xr.Dataset]:
-    """Compute true positives, false positives, and missed detections.
-
-    Uses Hungarian algorithm for matching.
-    """
-    # Add xy_min and xy_max if not present
-    if all(
-        [
-            var_str not in pred_bboxes_ds.variables
-            for var_str in ["xy_min", "xy_max"]
-        ]
-    ):
-        pred_bboxes_ds = _add_bboxes_min_max_corners(pred_bboxes_ds)
-
-    if all(
-        [
-            var_str not in gt_bboxes_ds.variables
-            for var_str in ["xy_min", "xy_max"]
-        ]
-    ):
-        gt_bboxes_ds = _add_bboxes_min_max_corners(gt_bboxes_ds)
-
-    # Prepare input for hungarian
-    pred_bboxes_x1y1_x2y2 = xr.concat(
-        [pred_bboxes_ds.xy_min, pred_bboxes_ds.xy_max], dim="space"
-    ).transpose("image_id", "id", "space")
-
-    gt_bboxes_x1y1_x2y2 = xr.concat(
-        [gt_bboxes_ds.xy_min, gt_bboxes_ds.xy_max], dim="space"
-    ).transpose("image_id", "id", "space")
-
-    # rename id dimension in gt_bboxes_x1y1_x2y2
-    gt_bboxes_x1y1_x2y2 = gt_bboxes_x1y1_x2y2.rename({"id": "id_gt"})
-
-    # Run hungarian vectorized
-    tp_array, fp_array, md_array, iou_tp_array = xr.apply_ufunc(
-        _evaluate_detections_hungarian_arrays,
-        pred_bboxes_x1y1_x2y2,
-        gt_bboxes_x1y1_x2y2,
-        kwargs={"iou_threshold": iou_threshold},
-        input_core_dims=[
-            ["id", "space"],
-            ["id_gt", "space"],
-        ],
-        output_core_dims=[
-            ["id"],
-            ["id"],
-            ["id_gt"],
-            ["id"],
-        ],
-        vectorize=True,
-        exclude_dims={"id", "id_gt"},
-    )
-
-    # Add to datasets
-    pred_bboxes_ds["tp"] = xr.DataArray(tp_array, dims=["image_id", "id"])
-    pred_bboxes_ds["fp"] = xr.DataArray(fp_array, dims=["image_id", "id"])
-    pred_bboxes_ds["iou_tp"] = xr.DataArray(
-        iou_tp_array, dims=["image_id", "id"]
-    )
-
-    # rename id dimension in md_array
-    md_array = md_array.rename({"id_gt": "id"})
-    gt_bboxes_ds["md"] = xr.DataArray(md_array, dims=["image_id", "id"])
-
-    return pred_bboxes_ds, gt_bboxes_ds
-
-
-def _evaluate_detections_hungarian_arrays(
-    pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, iou_threshold: float
-) -> tuple[np.ndarray, ...]:
-    """Compute true positives, false positives, and missed detections.
-
-    Uses Hungarian algorithm for matching and takes arrays of bboxes as input
-    in x1y1x2y2 format.
-
-    Parameters
-    ----------
-    pred_bboxes : np.ndarray
-        An array of prediction bounding boxes with the first four columns being
-        the coordinates of the bounding box in the format [x1, y1, x2, y2]
-    gt_bboxes : np.ndarray
-        An array of ground truth bounding boxes with the first four columns
-        being the coordinates of the bounding box in the format
-        [x1, y1, x2, y2]
-    iou_threshold : float
-        IoU threshold for considering a detection as true positive
-
-    Returns
-    -------
-    tuple
-        A tuple of four boolean arrays:
-        - true_positives: True for each predicted bbox that is a true positive
-        - false_positives: True for each predicted bbox that is a false
-        positive
-        - missed_detections: True for each ground truth bbox that is missed
-        - true_positives_iou: IoU of each true positive
-
-    Notes
-    -----
-    The output arrays are padded with False to the length of the original
-    arrays. This means that for example where the true_positives array is
-    False, that does not necessarily mean that the prediction is a false
-    positive. The same applies for the true_positives_iou array, which is
-    padded with nan.
-
-    """
-    # Remove nan values
-    n_pred_bboxes_padded = pred_bboxes.shape[0]
-    n_gt_bboxes_padded = gt_bboxes.shape[0]
-    pred_bboxes = pred_bboxes[~np.isnan(pred_bboxes).any(axis=1), :]
-    gt_bboxes = gt_bboxes[~np.isnan(gt_bboxes).any(axis=1), :]
-
-    # Initialize output arrays
-    true_positives = np.zeros(len(pred_bboxes), dtype=bool)
-    false_positives = np.zeros(len(pred_bboxes), dtype=bool)
-    matched_gts = np.zeros(len(gt_bboxes), dtype=bool)
-    missed_detections = np.zeros(len(gt_bboxes), dtype=bool)  # unmatched gts
-
-    true_positives_iou = np.zeros(len(pred_bboxes), dtype=float)
-
-    # cast as a tensor if not already
-    if not isinstance(pred_bboxes, torch.Tensor):
-        pred_bboxes = torch.from_numpy(pred_bboxes).float()
-    if not isinstance(gt_bboxes, torch.Tensor):
-        gt_bboxes = torch.from_numpy(gt_bboxes).float()
-
-    if len(pred_bboxes) > 0 and len(gt_bboxes) > 0:
-        # Compute IoU matrix (pred_bboxes x gt_bboxes)
-        iou_matrix = ops.box_iou(pred_bboxes[:, :4], gt_bboxes).cpu().numpy()
-        # iou_matrix[np.isnan(iou_matrix)] = -np.inf
-
-        # Use Hungarian algorithm to find optimal assignment
-        pred_indices, gt_indices = linear_sum_assignment(
-            iou_matrix, maximize=True
-        )
-
-        # Mark true positives and false positives based on optimal assignment
-        for pred_idx, gt_idx in zip(pred_indices, gt_indices, strict=True):
-            if iou_matrix[pred_idx, gt_idx] > iou_threshold:
-                true_positives[pred_idx] = True
-                matched_gts[gt_idx] = True
-                true_positives_iou[pred_idx] = iou_matrix[pred_idx, gt_idx]
-            else:
-                false_positives[pred_idx] = True
-
-        # Mark unmatched predictions as false positives
-        false_positives[~true_positives] = True
-
-        # Mark unmatched ground truth as missed detections
-        missed_detections[~matched_gts] = True
-
-    elif len(pred_bboxes) == 0 and len(gt_bboxes) > 0:
-        # No predictions, all ground truth are missed
-        missed_detections[:] = True
-    elif len(pred_bboxes) > 0 and len(gt_bboxes) == 0:
-        # No ground truth, all predictions are false positives
-        false_positives[:] = True
-
-    # Pad tp, fp for pred_bboxes with False
-    tp_fp_pred_bboxes_padded: tuple[np.ndarray, ...] = ()
-    for output in [true_positives, false_positives]:
-        output_padded = np.pad(
-            output,
-            (0, n_pred_bboxes_padded - len(output)),
-            mode="constant",
-            constant_values=False,
-        )
-        tp_fp_pred_bboxes_padded += (output_padded,)
-
-    # Pad true_positives_iou for pred_bboxes with nan
-    true_positives_iou_padded = np.pad(
-        true_positives_iou,
-        (0, n_pred_bboxes_padded - len(true_positives_iou)),
-        mode="constant",
-        constant_values=np.nan,
-    )
-
-    # Pad results for gt_bboxes with False
-    missed_detections_padded = np.pad(
-        missed_detections,
-        (0, n_gt_bboxes_padded - len(missed_detections)),
-        mode="constant",
-        constant_values=False,
-    )
-    return tp_fp_pred_bboxes_padded + (
-        missed_detections_padded,
-        true_positives_iou_padded,
-    )
-
-
-def compute_precision_recall_ds(
-    pred_bboxes_ds: xr.Dataset,
-    gt_bboxes_ds: xr.Dataset,
-    iou_threshold: float,
-) -> tuple[xr.Dataset, xr.Dataset]:
-    """Compute precision and recall per image."""
-    # Compute true positives, false positives, and missed detections
-    pred_bboxes_ds, gt_bboxes_ds = evaluate_detections_hungarian_ds(
-        pred_bboxes_ds=pred_bboxes_ds,
-        gt_bboxes_ds=gt_bboxes_ds,
-        iou_threshold=iou_threshold,
-    )
-
-    # Compute precision and recall per image
-    precision_per_img = pred_bboxes_ds.tp.sum(dim="id") / (
-        pred_bboxes_ds.tp.sum(dim="id") + pred_bboxes_ds.fp.sum(dim="id")
-    )
-    recall_per_img = pred_bboxes_ds.tp.sum(dim="id") / (
-        pred_bboxes_ds.tp.sum(dim="id") + gt_bboxes_ds.md.sum(dim="id")
-    )
-
-    # Add to datasets
-    pred_bboxes_ds["precision"] = precision_per_img
-    pred_bboxes_ds["recall"] = recall_per_img
-
-    return pred_bboxes_ds, gt_bboxes_ds
-
-
-def _add_bboxes_min_max_corners(ds):
-    """Add xy_min and xy_max arrays to ds.
-
-    # Compare to torchvision.ops.box_convert in testing?
-    box_convert(
-        torch.from_numpy(np.c_[ds.position.T, ds.shape.T]),
-        in_fmt="cxcywh",
-        out_fmt="xyxy",
-    )
-    """
-    ds["xy_min"] = ds.position - 0.5 * ds.shape
-    ds["xy_max"] = ds.position + 0.5 * ds.shape
-    return ds
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
deleted file mode 100644
index d8ebb51c..00000000
--- a/examples/ensemble_of_detectors.py
+++ /dev/null
@@ -1,353 +0,0 @@
-"""Evaluating ensemble of trained detectors."""
-# %%
-# imports
-
-from pathlib import Path
-
-import numpy as np
-import torch
-import torchvision.transforms.v2 as transforms
-import xarray as xr
-import yaml
-from lightning import Trainer
-from matplotlib import pyplot as plt
-from torch.utils.data import DataLoader
-from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
-
-from ethology.detectors.ensembles.fusion import fuse_detections
-from ethology.detectors.ensembles.models import EnsembleDetector
-from ethology.detectors.evaluate import compute_precision_recall_ds
-from ethology.io.annotations import load_bboxes
-
-# %%
-# %matplotlib widget
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-# Helper functions
-def create_coco_dataset(
-    images_dir: str | Path,
-    annotations_file: str | Path,
-    composed_transform: transforms.Compose,
-) -> CocoDetection:
-    """Create a COCO dataset for object detection.
-
-    Note: transforms are applied to the full dataset. If the dataset
-    is later split, all splits will have the same transforms.
-    """
-    dataset_coco = CocoDetection(
-        root=images_dir,
-        annFile=annotations_file,
-        transforms=composed_transform,
-    )
-
-    # wrap dataset for transforms v2
-    dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco)
-
-    return dataset_transformed
-
-
-def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
-    """Collate function for dataloader with varying number of bounding boxes.
-
-    A custom function is needed for detection
-    because the number of bounding boxes varies
-    between images of the same batch.
-    See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop
-
-    Parameters
-    ----------
-    batch : tuple
-        a tuple of 2 tuples, the first one holding all images in the batch,
-        and the second one holding the corresponding annotations.
-
-    Returns
-    -------
-    tuple
-        a tuple of length = batch size, made up of (image, annotations)
-        tuples.
-
-    """
-    return tuple(zip(*batch, strict=True))
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Input data
-
-dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full")
-images_dir = dataset_dir / "frames"
-annotations_dir = dataset_dir / "annotations"
-annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json"
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Define a dataloader
-# Define transforms for inference
-inference_transforms = transforms.Compose(
-    [
-        transforms.ToImage(),
-        transforms.ToDtype(torch.float32, scale=True),
-    ]
-)
-
-# Create COCO dataset
-# TODO: convert from ethology detections dataset to COCO dataset
-# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
-dataset_coco = create_coco_dataset(
-    images_dir=Path(dataset_dir) / "frames",
-    annotations_file=annotations_file_path,
-    composed_transform=inference_transforms,
-)
-
-# dataloader
-dataloader = DataLoader(
-    dataset_coco,
-    batch_size=12,
-    shuffle=False,
-    num_workers=4,
-    collate_fn=collate_fn_varying_n_bboxes,
-    persistent_workers=True,
-    # multiprocessing_context="fork"
-    # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available()
-    # else None,  # see https://github.com/pytorch/pytorch/issues/87688
-)
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Define a YAML config file for the ensemble of trained detectors
-experiment_ID = "617393114420881798"
-ml_runs_experiment_dir = (
-    Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID
-)
-last_ckpt = Path("checkpoints") / "last.ckpt"
-
-config = {
-    "models": {
-        "model_class": "fasterrcnn_resnet50_fpn_v2",
-        # imported from torchvision.models.detection
-        "model_kwargs": {
-            "num_classes": 2,
-            "weights": None,  # null in YAML becomes None in Python
-            "weights_backbone": None,
-        },
-        "checkpoints": [
-            str(
-                ml_runs_experiment_dir
-                / "f348d9d196934073bece1b877cbc4d38"
-                / last_ckpt
-            ),  # above_0th
-            str(
-                ml_runs_experiment_dir
-                / "879d2f77e2b24adcb06b87d2fede6a04"
-                / last_ckpt
-            ),  # above_1st
-            str(
-                ml_runs_experiment_dir
-                / "75583ec227e3444ab692b99c64795325"
-                / last_ckpt
-            ),  # above_5th
-            str(
-                ml_runs_experiment_dir
-                / "4acc37206b1e4f679d535c837bee2c2f"
-                / last_ckpt
-            ),  # above_10th
-            str(
-                ml_runs_experiment_dir
-                / "fdcf88fcbcc84fbeb94b45ca6b6f8914"
-                / last_ckpt
-            ),  # above_25th
-            str(
-                ml_runs_experiment_dir
-                / "daa05ded0ea047388c9134bf044061c5"
-                / last_ckpt
-            ),  # above_50th
-        ],
-    },
-    "fusion": {
-        "method": "weighted_boxes_fusion",
-        # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted"
-        "method_kwargs": {
-            # arguments as in ensemble_boxes.weighted_boxes_fusion
-            "iou_thr": 0.5,  # iou threshold for the ensemble
-            "skip_box_thr": 0.0001,
-        },
-        # "n_jobs": -1,  # workers for joblib.Parallel,
-        # n_workers should be <= number of CPU cores
-        # "confidence_threshold_post_fusion": 0.0,
-        "max_n_detections": 300,
-    },
-}
-config_file = "ensemble_of_detectors.yaml"
-with open(config_file, "w") as f:
-    yaml.dump(config, f, sort_keys=False)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Load the ensemble of detectors
-ensemble_detector = EnsembleDetector(config_file)
-print(f"Ensemble detector is on device: {ensemble_detector.device}")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Run the ensemble of detectors on a dataset
-# Use Trainer for inference (this sets the device flexibly)
-trainer = Trainer(accelerator="gpu", devices=1, logger=False)
-_ = trainer.predict(ensemble_detector, dataloader)
-
-
-# Format predictions as ethology detections dataset and add attrs
-# TODO: think about syntax of format_predictions (should it be instance or
-# static method instead?)
-# Q: Can it just be output from .predict?
-# TODO: dataloader to ethology detections dataset
-gt_bboxes_ds = load_bboxes.from_files(
-    annotations_file_path, format="COCO", images_dirs=images_dir
-)
-ensemble_detections_ds = ensemble_detector.format_predictions(
-    attrs=gt_bboxes_ds.attrs
-)
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Some nice plots:
-# ensemble_detections_ds.confidence.sel(image_id=0).plot()
-# ensemble_detections_ds.confidence.sel(model=0).plot()
-for m in range(ensemble_detections_ds.model.size):
-    plt.figure()
-    ensemble_detections_ds.confidence.sel(model=m).plot()
-
-
-# %%%%%%%%
-# All models predict less boxes and have less avg confidence per image in
-# image_ids from 350 to 450. Let's inspect video names and images for these
-# samples.
-
-# Add video name array
-video_name = [
-    ensemble_detections_ds.map_image_id_to_filename[img_id].split("_frame")[0]
-    for img_id in ensemble_detections_ds.image_id.values
-]
-ensemble_detections_ds["video"] = xr.DataArray(video_name, dims="image_id")
-
-# which videos?
-np.unique(ensemble_detections_ds.video.sel(image_id=range(350, 450)).values)
-
-# %%%%%%
-# Visualise image
-for image_id in range(350, 450, 10):
-    image_filename = ensemble_detections_ds.map_image_id_to_filename[image_id]
-    image_path = ensemble_detections_ds.images_directories / image_filename
-
-    # img = Image.open(image_path)
-    img = plt.imread(image_path)
-
-    plt.figure()
-    plt.imshow(img)
-    plt.title(f"{image_filename}")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Fuse detections across models with WBF
-# TODO: think whether joblib approach is more readable?
-image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
-ensemble_detections_ds.attrs["image_shape"] = image_width_height
-config_fusion: dict = config["fusion"]
-
-
-fused_detections_ds = fuse_detections(
-    ensemble_detections_ds,
-    fusion_method=config_fusion["method"],
-    fusion_method_kwargs=config_fusion["method_kwargs"],
-    # max_n_detections=config_fusion["max_n_detections"],
-    # should be larger than expected maximum number of detections after fusion
-    # ---- method kwargs ----
-)
-
-# %%
-from ethology.validators.detections import ValidBboxDetectionsDataset
-
-ValidBboxDetectionsDataset(fused_detections_ds)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Fuse detections across models with NMS
-
-# fused_detections_nms_ds = fuse_ensemble_detections(
-#     ensemble_detections_ds,
-#     fusion_method="soft_nms",
-#     fusion_method_kwargs={
-#         "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
-#         "sigma": 0.5,
-#         "thresh": 0.001,
-#     },
-#     max_n_detections=500,
-# )
-
-# fused_detections_ds = fused_detections_nms_ds
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Remove low confidence detections
-confidence_threshold_post_fusion = 0.4
-fused_detections_ds_ = fused_detections_ds.where(
-    fused_detections_ds.confidence >= confidence_threshold_post_fusion
-)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Evaluate the ensemble model
-# - load ground truth
-# - compute metrics
-
-# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
-
-iou_threshold_tp = 0.25
-fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds(
-    pred_bboxes_ds=fused_detections_ds_,
-    gt_bboxes_ds=gt_bboxes_ds,
-    iou_threshold=iou_threshold_tp,
-)
-
-# All models on full August dataset, without removing low
-# confidence detections:
-# confidence_threshold_post_fusion = 0.0
-# Precision: 0.5920
-# Recall: 0.8455
-# ---
-# confidence_threshold_post_fusion = 0.4
-# Precision: 0.8339
-# Recall: 0.7177
-# ---
-# confidence_threshold_post_fusion = 0.5
-# Precision: 0.8714
-# Recall: 0.6624
-# ---
-
-print(
-    "Ensemble model with confidence threshold post fusion: "
-    f"{confidence_threshold_post_fusion:.2f}"
-)
-print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}")
-print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Plot calibration curve
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Evaluate single models
-list_detections_ds_eval = []
-for k in range(ensemble_detections_ds.sizes["model"]):
-    # filter low confidence detections (for a fairer comparison)
-    detections_one_model = ensemble_detections_ds.where(
-        ensemble_detections_ds.confidence >= confidence_threshold_post_fusion
-    ).sel(model=k)
-
-    # evaluate
-    detections_ds, _ = compute_precision_recall_ds(
-        pred_bboxes_ds=detections_one_model,
-        gt_bboxes_ds=gt_bboxes_ds,
-        iou_threshold=iou_threshold_tp,
-    )
-    list_detections_ds_eval.append(detections_ds)
-
-    print(f"Model: {k}")
-    print(f"Precision: {detections_ds.precision.mean().values:.4f}")
-    print(f"Recall: {detections_ds.recall.mean().values:.4f}")
-    print("--------------------------------")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Visualise detections
diff --git a/examples/ensemble_of_detectors.yaml b/examples/ensemble_of_detectors.yaml
deleted file mode 100644
index 80de260b..00000000
--- a/examples/ensemble_of_detectors.yaml
+++ /dev/null
@@ -1,19 +0,0 @@
-models:
-  model_class: fasterrcnn_resnet50_fpn_v2
-  model_kwargs:
-    num_classes: 2
-    weights: null
-    weights_backbone: null
-  checkpoints:
-  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt
-  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt
-  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/75583ec227e3444ab692b99c64795325/checkpoints/last.ckpt
-  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/4acc37206b1e4f679d535c837bee2c2f/checkpoints/last.ckpt
-  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/fdcf88fcbcc84fbeb94b45ca6b6f8914/checkpoints/last.ckpt
-  - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/daa05ded0ea047388c9134bf044061c5/checkpoints/last.ckpt
-fusion:
-  method: weighted_boxes_fusion
-  method_kwargs:
-    iou_thr: 0.5
-    skip_box_thr: 0.0001
-  max_n_detections: 300

From 397c1a7a492b9b8c948301e3dfba0fe1225b5a82 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 11:36:58 +0000
Subject: [PATCH 25/39] Accelerate fusion using joblib

---
 ethology/detectors/ensembles/fusion.py | 195 ++++++++++++++-----------
 1 file changed, 108 insertions(+), 87 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 1bd08ed1..95bf99cd 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -2,11 +2,14 @@
 
 from collections.abc import Callable
 from functools import partial
-from typing import Literal, TypedDict, Unpack
+from typing import Literal, TypeAlias, TypedDict, Unpack
 
 import ensemble_boxes
 import numpy as np
+import pandas as pd
 import xarray as xr
+from joblib import Parallel, delayed
+from tqdm import tqdm
 
 from ethology.validators.detections import (
     ValidBboxDetectionsDataset,
@@ -14,6 +17,8 @@
 )
 from ethology.validators.utils import _check_input, _check_output
 
+# ------------------- Supported fusion methods ------------------
+# from ensemble_boxes
 VALID_FUSION_METHODS = {
     "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion,
     "nms": ensemble_boxes.nms,
@@ -21,13 +26,25 @@
     "non_maxium_weighted": ensemble_boxes.non_maximum_weighted,
 }
 
-fusion_method_type = Literal[
-    "weighted_boxes_fusion", "nms", "soft_nms", "non_maxium_weighted"
+
+#  ------------------ Custom types  ----------------------
+TypeFusionMethod = Literal[
+    "weighted_boxes_fusion",
+    "nms",
+    "soft_nms",
+    "non_maxium_weighted",
+]
+
+TupleFourDataArrays: TypeAlias = tuple[
+    xr.DataArray,
+    xr.DataArray,
+    xr.DataArray,
+    xr.DataArray,
 ]
 
 
-class _TypeFusionKwargs(TypedDict, total=False):
-    """Type hints for fusion method kwargs.
+class _TypeFusionMethodKwargs(TypedDict, total=False):
+    """Type hints for fusion method keyword arguments.
 
     Parameters for methods as described in the ensemble_boxes documentation.
     See https://github.com/ZFTurbo/Weighted-Boxes-Fusion
@@ -38,21 +55,20 @@ class _TypeFusionKwargs(TypedDict, total=False):
     weights: list[float]
         Weights for each model.
     iou_thr: float
-        IoU threshold for detections to be considered a true positive
         IoU threshold for detections to be considered a true positive
         during fusion.
     skip_box_thr: float
-        Exclude from fusion boxes with confidence below this value.
+        Exclude boxes with confidence below this value from fusion.
     sigma: float
-        Sigma for soft NMS.
+        Sigma for soft non-maximum supression.
     thresh: float
-        Threshold for boxes to keep after soft NMS.
+        Threshold for boxes to keep after soft non-maximum supression.
     conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"]
         Method to compute the confidence score of the fused detections.
 
         - "avg": Average confidence score of the fused detections (default).
-        - 'box_and_model_avg': box and model wise hybrid weighted average.
-        - 'absent_model_aware_avg': weighted average that takes into account
+        - "box_and_model_avg": box and model wise hybrid weighted average.
+        - "absent_model_aware_avg": weighted average that takes into account
           the absent model.
     allows_overflow: bool
         Whether to allow the confidence score of the fused detections to
@@ -69,13 +85,17 @@ class _TypeFusionKwargs(TypedDict, total=False):
     allows_overflow: bool
 
 
+# ----------------------------------
+
+
 @_check_input(ValidBboxDetectionsEnsembleDataset)
 @_check_output(ValidBboxDetectionsDataset)
 def fuse_detections(
     ensemble_detections_ds: xr.Dataset,
-    fusion_method: fusion_method_type,
+    fusion_method: TypeFusionMethod,
     fusion_method_kwargs: dict | None = None,
     max_n_detections: int | None = None,
+    n_workers: int | None = -1,  # number of workers for joblib.Parallel
 ) -> xr.Dataset:
     """Fuse ensemble detections across models using WBF.
 
@@ -109,46 +129,26 @@ def fuse_detections(
         _fuse_single_image_detections, fusion_function
     )
 
-    # Run fusion across image_id using apply_ufunc
-    centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = (
-        xr.apply_ufunc(
-            _fuse_single_image_detections_partial,
-            ensemble_detections_ds.position,  # .data array is passed
-            ensemble_detections_ds.shape,
-            ensemble_detections_ds.confidence,
-            ensemble_detections_ds.label,
-            kwargs={
-                "image_width_height": image_width_height,
-                "max_n_detections": max_n_detections,
-                **(fusion_method_kwargs if fusion_method_kwargs else {}),
-            },
-            input_core_dims=[  # do not broadcast across these
-                ["space", "id", "model"],  # centroid
-                ["space", "id", "model"],  # shape
-                ["id", "model"],  # confidence
-                ["id", "model"],  # label
-            ],
-            output_core_dims=[  # do not broadcast across these
-                ["space", "id"],  # centroid
-                ["space", "id"],  # shape
-                ["id"],  # confidence
-                ["id"],  # label
-            ],
-            vectorize=True,
-            # TODO: can I avoid vectorize?
-            # loop over non-core dims (i.e. image_id);
-            # assumes function only takes arrays over core dims as input
-            exclude_dims={"id"},
-            # to allow dimensions that change size between input and output
+    # Run fusion across image_id
+    # if n_workers is None:
+    #     n_workers = -1
+
+    results_per_img_id = Parallel(n_jobs=n_workers)(
+        delayed(_fuse_single_image_detections_partial)(
+            ensemble_detections_ds.position.sel(image_id=img_id).values,
+            ensemble_detections_ds.shape.sel(image_id=img_id).values,
+            ensemble_detections_ds.confidence.sel(image_id=img_id).values,
+            ensemble_detections_ds.label.sel(image_id=img_id).values,
+            image_width_height,
+            max_n_detections,
+            **fusion_method_kwargs,
         )
+        for img_id in tqdm(ensemble_detections_ds.image_id)
     )
 
     # Postprocess data arrays
     fused_data_arrays = _postprocess_multi_image_fused_arrays(
-        position=centroid_fused_da,
-        shape=shape_fused_da,
-        confidence=confidence_fused_da,
-        label=label_fused_da,
+        results_per_img_id, ensemble_detections_ds.image_id
     )
 
     # Return a dataset
@@ -209,15 +209,16 @@ def _preprocess_single_image_detections(
     label: xr.DataArray,
     image_width_height: np.ndarray,
 ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]:
-    """Prepare ensemble detections on a single image for fusion."""
-    # Prepare boxes array --> position, shape arrays to x1y1x2y normalised
+    """Prepare detections of an ensemble on a single image for fusion."""
+    # Prepare boxes array
+    # transform position and shape arrays to x1y1x2y normalised
     bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None]
     bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None]
     bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2])
-    # 4, n_annot, n_models
+    # shape: 4, max_n_annotations_per_frame, n_models
 
     # Get list of bboxes per model
-    # arrays need to be tall for WBF
+    # arrays need to be tall for fusion methods
     n_models = bboxes_x1y1_x2y2_normalised.shape[-1]
     list_bboxes_per_model = [
         arr.squeeze()
@@ -308,7 +309,7 @@ def _postprocess_single_image_detections(
 
     # Format output as xarray dataarrays
     centroid_da, shape_da, confidence_da, label_da = (
-        _single_image_detections_as_dataarrays(
+        _parse_single_image_detections_as_dataarrays(
             ensemble_data[:, 0:4],
             ensemble_data[:, 4],
             ensemble_data[:, 5],
@@ -320,15 +321,15 @@ def _postprocess_single_image_detections(
 
 def _fuse_single_image_detections(
     fusion_function: Callable,
-    position,
-    shape,
+    position: np.ndarray,
+    shape: np.ndarray,
     confidence: np.ndarray,
     label: np.ndarray,
     image_width_height: np.ndarray,
     max_n_detections: int,
-    **fusion_kwargs: Unpack[_TypeFusionKwargs],  #  method-only kwargs
-) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Fuse detections across models for a single image using WBF."""
+    **fusion_kwargs: Unpack[_TypeFusionMethodKwargs],  #  method-only kwargs
+) -> TupleFourDataArrays:
+    """Fuse detections across models for a single image using selected method."""
     # Prepare single image arrays for fusion
     list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
         _preprocess_single_image_detections(
@@ -336,8 +337,7 @@ def _fuse_single_image_detections(
         )
     )
 
-    # ------------------------------------
-    # Run WBF on one image
+    # Run fusion method on one image
     ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
         fusion_function(
             list_bboxes_per_model,
@@ -347,8 +347,6 @@ def _fuse_single_image_detections(
         )
     )
 
-    # ------------------------------------
-
     # Format output as xarray dataarrays
     centroid_da, shape_da, confidence_da, label_da = (
         _postprocess_single_image_detections(
@@ -363,13 +361,13 @@ def _fuse_single_image_detections(
     return centroid_da, shape_da, confidence_da, label_da
 
 
-def _single_image_detections_as_dataarrays(
+def _parse_single_image_detections_as_dataarrays(
     x1y1_x2y2_array: np.ndarray,
     scores_array: np.ndarray,
     labels_array: np.ndarray,
     id_array: np.ndarray | None = None,
-) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]:
-    """Format single image fused detections as data arrays."""
+) -> TupleFourDataArrays:
+    """Format array of single image fused results as data arrays."""
     if id_array is None:
         n_detections = x1y1_x2y2_array.shape[0]
         id_array = np.arange(n_detections)
@@ -396,30 +394,53 @@ def _single_image_detections_as_dataarrays(
     )
 
 
+@_check_output(ValidBboxDetectionsDataset)
 def _postprocess_multi_image_fused_arrays(
-    position: xr.DataArray,
-    shape: xr.DataArray,
-    confidence: xr.DataArray,
-    label: xr.DataArray,
-) -> dict:
-    """Postprocess fused data arrays on multiple images after fusion."""
-    data_arrays = [position, shape, confidence, label]
+    results_per_img_id: list[TupleFourDataArrays],
+    list_img_id: list,
+) -> xr.Dataset:
+    """Postprocess fused data arrays on multiple images after fusion.
 
-    # Remove extra padding across annotations
-    position_da, shape_da, confidence_da, label_da = [
-        da.dropna(dim="id", how="all") for da in data_arrays
-    ]
+    Fix padding and assign id coordinates.
+    """
+    # Parse results from joblib
+    # (output from joblib is a list of n = n_images, each element
+    # containing a tuple of data arrays)
+    list_da_dict = {}
+    (
+        list_da_dict["position"],
+        list_da_dict["shape"],
+        list_da_dict["confidence"],
+        list_da_dict["label"],
+    ) = list(zip(*results_per_img_id))
+
+    # Concatenate lists of dataarrays along image_id dimension
+    fused_da_dict = {}
+    for da_str, list_da in list_da_dict.items():
+        fused_da_dict[da_str] = xr.concat(
+            list_da,
+            pd.Index(list_img_id, name="image_id"),
+        )
+
+    # Remove extra padding in id dimension
+    fixed_padding_da_dict = {}
+    for da_str, da in fused_da_dict.items():
+        fixed_padding_da_dict[da_str] = da.dropna(dim="id", how="all")
 
     # Pad labels with -1 rather than nan
-    label_da = label_da.fillna(-1).astype(int)
-
-    # Assign id coordinates to data arrays
-    # (these are lost after apply_ufunc because exclude_dims is used)
-    n_max_detections = position_da.sizes["id"]
-    id_coords = np.arange(n_max_detections)
-    return {
-        "position": position_da.assign_coords(id=id_coords),
-        "shape": shape_da.assign_coords(id=id_coords),
-        "confidence": confidence_da.assign_coords(id=id_coords),
-        "label": label_da.assign_coords(id=id_coords),
-    }
+    fixed_padding_da_dict["label"] = (
+        fixed_padding_da_dict["label"].fillna(-1).astype(int)
+    )
+
+    # Format as dataset
+    return xr.Dataset(data_vars=fixed_padding_da_dict)
+    # # Assign id coordinates to data arrays
+    # # (these are lost after apply_ufunc because exclude_dims is used)
+    # n_max_detections = fixed_padding_da_dict["position"].sizes["id"]
+    # id_coords = np.arange(n_max_detections)
+
+    # fixed_id_coord_da_dict = {}
+    # for da_str, da in fixed_padding_da_dict.items():
+    #     fixed_id_coord_da_dict[da_str] = da.assign_coords(id=id_coords)
+
+    # return fixed_padding_da_dict

From a515f1f7c6b8f2ea66271a870eb2441d9e6a9ff5 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 13:05:41 +0000
Subject: [PATCH 26/39] Simplify fusion module

---
 ethology/detectors/ensembles/fusion.py | 339 ++++++++++---------------
 1 file changed, 138 insertions(+), 201 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 95bf99cd..948ae776 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -48,32 +48,6 @@ class _TypeFusionMethodKwargs(TypedDict, total=False):
 
     Parameters for methods as described in the ensemble_boxes documentation.
     See https://github.com/ZFTurbo/Weighted-Boxes-Fusion
-
-
-    Parameters
-    ----------
-    weights: list[float]
-        Weights for each model.
-    iou_thr: float
-        IoU threshold for detections to be considered a true positive
-        during fusion.
-    skip_box_thr: float
-        Exclude boxes with confidence below this value from fusion.
-    sigma: float
-        Sigma for soft non-maximum supression.
-    thresh: float
-        Threshold for boxes to keep after soft non-maximum supression.
-    conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"]
-        Method to compute the confidence score of the fused detections.
-
-        - "avg": Average confidence score of the fused detections (default).
-        - "box_and_model_avg": box and model wise hybrid weighted average.
-        - "absent_model_aware_avg": weighted average that takes into account
-          the absent model.
-    allows_overflow: bool
-        Whether to allow the confidence score of the fused detections to
-        exceed 1.
-
     """
 
     weights: list[float] | None
@@ -95,12 +69,12 @@ def fuse_detections(
     fusion_method: TypeFusionMethod,
     fusion_method_kwargs: dict | None = None,
     max_n_detections: int | None = None,
-    n_workers: int | None = -1,  # number of workers for joblib.Parallel
+    n_workers: int | None = -1,  
 ) -> xr.Dataset:
-    """Fuse ensemble detections across models using WBF.
+    """Fuse ensemble detections across models using the selected method.
 
     You can set a max_n_detections if upper bound is known a prior to
-    reduce memory usage.
+    reduce memory usage. n_workers: number of workers for joblib.Parallel
 
     """
     # Check if image_width_height defined in dataset
@@ -111,14 +85,13 @@ def fuse_detections(
             "attributes. Please ensure the dataset has 'image_shape' "
             "(width, height in pixels) in its attributes."
         )
-    else:
-        image_width_height = _validate_image_shape(image_shape)
+    image_width_height = _validate_image_shape(image_shape)
 
     # Compute upper bound of max_n_detections
     if not max_n_detections:
         max_n_detections = _estimate_max_n_detections(ensemble_detections_ds)
 
-    # Build single-image partial fusion function for the selected method
+    # Build single-image partial function for the selected fusion method
     if fusion_method not in VALID_FUSION_METHODS:
         raise ValueError(
             f"Invalid fusion method: {fusion_method}. "
@@ -129,10 +102,7 @@ def fuse_detections(
         _fuse_single_image_detections, fusion_function
     )
 
-    # Run fusion across image_id
-    # if n_workers is None:
-    #     n_workers = -1
-
+    # Parallelise fusion across image_id
     results_per_img_id = Parallel(n_jobs=n_workers)(
         delayed(_fuse_single_image_detections_partial)(
             ensemble_detections_ds.position.sel(image_id=img_id).values,
@@ -147,28 +117,45 @@ def fuse_detections(
     )
 
     # Postprocess data arrays
-    fused_data_arrays = _postprocess_multi_image_fused_arrays(
+    fused_detections_ds = _postprocess_multi_image_fused_arrays(
         results_per_img_id, ensemble_detections_ds.image_id
     )
 
-    # Return a dataset
-    return xr.Dataset(data_vars=fused_data_arrays)
+    return fused_detections_ds
 
 
-def _validate_image_shape(image_shape) -> np.ndarray:
-    """Validate and convert image shape to numpy array.
+# ------- Multi image fusion ------------------
 
-    Args:
-        image_shape: Image dimensions as (width, height).
-            Should be array-like with 2 elements.
+@_check_output(ValidBboxDetectionsDataset)
+def _postprocess_multi_image_fused_arrays(
+    results_per_img_id: list[TupleFourDataArrays],
+    list_img_id: list,
+) -> xr.Dataset:
+    """Postprocess fused data arrays on multiple images after fusion.
 
-    Returns:
-        np.ndarray: Validated image shape as 1D array with 2 elements.
+    Fix padding and assign id coordinates.
+    """
+    # Transpose results from list-of-tuples to tuple-of-lists
+    da_names = ("position", "shape", "confidence", "label")
+    da_lists = zip(*results_per_img_id)
 
-    Raises:
-        ValueError: If image_shape cannot be converted to a valid shape.
+    # Concatenate lists of dataarrays along image_id dimension and
+    # remove extra padding in "id" dimension
+    fused_da_dict = {}
+    for da_str, list_da in zip(da_names, da_lists, strict=True):
+        fused_da_dict[da_str] = xr.concat(
+            list_da, pd.Index(list_img_id, name="image_id")
+        ).dropna(dim="id", how="all")
 
-    """
+    # Pad labels with -1 rather than nan
+    fused_da_dict["label"] = fused_da_dict["label"].fillna(-1).astype(int)
+
+    return xr.Dataset(data_vars=fused_da_dict)
+
+
+def _validate_image_shape(image_shape) -> np.ndarray:
+    """Validate and cast image shape as numpy array."""
+    # Try casting as numpy array
     try:
         image_shape = np.asarray(image_shape)
     except (TypeError, ValueError) as e:
@@ -177,20 +164,22 @@ def _validate_image_shape(image_shape) -> np.ndarray:
             "Expected format: (width, height) as tuple or array-like."
         ) from e
 
-    # Flatten to handle (2,), (1,2) and (2,1) shapes
-    image_shape = image_shape.flatten()
-    if image_shape.shape != (2,):
+    # Check number of elements in array
+    if image_shape.size != 2:
         raise ValueError(
             f"'image_shape' must have exactly 2 elements (width, height), "
             f"got shape {image_shape.shape}"
         )
-
     return image_shape
 
 
 @_check_input(ValidBboxDetectionsEnsembleDataset)
 def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int:
-    """Get upper bound for maximum number of boxes per image after fusion."""
+    """Get upper bound for maximum number of boxes per image after fusion.
+    
+    We assume no detections are fused and all images have as many detections as the maximum
+    number of non-nan detections per image.
+    """
     detections_w_non_nan_position = (
         ensemble_detections_ds.position.notnull().all(dim="space")
     )  # True if non-nan x and y
@@ -202,6 +191,51 @@ def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int:
     )
 
 
+# ------- Single image fusion ------------------
+
+
+def _fuse_single_image_detections(
+    fusion_function: Callable,
+    position: np.ndarray,
+    shape: np.ndarray,
+    confidence: np.ndarray,
+    label: np.ndarray,
+    image_width_height: np.ndarray,
+    max_n_detections: int,
+    **fusion_kwargs: Unpack[_TypeFusionMethodKwargs],  #  method-only kwargs
+) -> TupleFourDataArrays:
+    """Fuse detections across models for a single image using selected method."""
+    # Prepare single image arrays for fusion
+    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
+        _preprocess_single_image_detections(
+            position, shape, confidence, label, image_width_height
+        )
+    )
+
+    # Run fusion method on one image
+    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
+        fusion_function(
+            list_bboxes_per_model,
+            list_confidence_per_model,
+            list_label_per_model,
+            **fusion_kwargs,
+        )
+    )
+
+    # Format output as xarray dataarrays
+    centroid_da, shape_da, confidence_da, label_da = (
+        _postprocess_single_image_detections(
+            ensemble_x1y1_x2y2_norm,
+            ensemble_scores,
+            ensemble_labels,
+            image_width_height,
+            max_n_detections,
+        )
+    )
+
+    return centroid_da, shape_da, confidence_da, label_da
+
+
 def _preprocess_single_image_detections(
     position: xr.DataArray,
     shape: xr.DataArray,
@@ -214,9 +248,14 @@ def _preprocess_single_image_detections(
     # transform position and shape arrays to x1y1x2y normalised
     bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None]
     bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None]
-    bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2])
-    # shape: 4, max_n_annotations_per_frame, n_models
+    bboxes_x1y1_x2y2_normalised = np.transpose(
+        np.concat(
+            [bboxes_x1y1, bboxes_x2y2]
+        ),  # shape: 4, max_n_annotations_per_frame, n_models
+        (1, 0, 2),  # shape: max_n_annotations_per_frame, 4, n_models
+    )
 
+    # --------------------
     # Get list of bboxes per model
     # arrays need to be tall for fusion methods
     n_models = bboxes_x1y1_x2y2_normalised.shape[-1]
@@ -230,36 +269,31 @@ def _preprocess_single_image_detections(
     list_label_per_model = [
         arr.squeeze() for arr in np.split(label, n_models, axis=-1)
     ]
+    # --------------------
 
-    # Remove rows with nan coordinates
-    list_bboxes_per_model = [
-        arr[:, ~np.any(np.isnan(arr), axis=0)].T
-        for arr in list_bboxes_per_model
-    ]
-    list_confidence_per_model = [
-        conf_arr[: bbox_arr.shape[0]]
-        for bbox_arr, conf_arr in zip(
+    # Remove rows with nan coordinates and return lists of arrays
+    list_non_nan_bboxes_per_model = [
+        sum(~np.any(np.isnan(arr), axis=1)) for arr in list_bboxes_per_model
+    ]  
+    return (
+        _chop_end_of_array(list_arrays_per_model, list_non_nan_bboxes_per_model)
+        for list_arrays_per_model in [
             list_bboxes_per_model,
             list_confidence_per_model,
-            strict=True,
-        )
-    ]
-    list_label_per_model = [
-        label_arr[: bbox_arr.shape[0]]
-        for bbox_arr, label_arr in zip(
-            list_bboxes_per_model,
             list_label_per_model,
-            strict=True,
-        )
-    ]
-
-    return (
-        list_bboxes_per_model,
-        list_confidence_per_model,
-        list_label_per_model,
+        ]
     )
 
 
+def _chop_end_of_array(
+    list_arrays: list[np.ndarray], list_end_lengths: list[int]
+) -> list[np.ndarray]:
+    """Chop end of arrays in list to the desired length along the first dimension."""
+    return [
+        arr[:n] for arr, n in zip(list_arrays, list_end_lengths, strict=True)
+    ]
+
+
 def _postprocess_single_image_detections(
     ensemble_x1y1_x2y2_norm,
     ensemble_scores,
@@ -276,89 +310,44 @@ def _postprocess_single_image_detections(
         image_width_height, (1, 2)
     )
 
-    # Combine x1y1, x2y2, scores and labels in one array
-    ensemble_data = np.c_[ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels]
-
-    # Remove rows with nan coordinates
-    ensemble_data = ensemble_data[
-        ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1)
-    ]
-
-    # Check padding
-    if ensemble_data.shape[0] > max_n_detections:
+    # Get 1d array for non-nan boxes
+    bool_non_nan_array = ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1)
+    n_non_nan_boxes = bool_non_nan_array.sum()
+    if n_non_nan_boxes > max_n_detections:
         raise ValueError(
             "Insufficient padding provided. "
             "The estimated maximum number of detections per image was set to "
             f"{max_n_detections}, "
-            f"but {ensemble_data.shape[0]} detections were "
+            f"but {n_non_nan_boxes} detections were "
             "found in one of the images after fusion. Please increase the "
             "maximum number of detections per image."
         )
 
-    # Pad combined array to max_n_detections
-    # (this is required to concatenate across image_ids)
-    ensemble_data = np.pad(
-        ensemble_data,
-        (
-            (0, max_n_detections - ensemble_data.shape[0]),
-            (0, 0),
+    # Retain non-nan boxes only and pad each array
+    return _parse_single_image_detections_as_dataarrays(
+        *(
+            _remove_nan_and_pad_to_max(
+                arr, bool_non_nan_array, max_n_detections
+            )
+            for arr in (ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels)
         ),
-        "constant",
-        constant_values=np.nan,
-    )
-
-    # Format output as xarray dataarrays
-    centroid_da, shape_da, confidence_da, label_da = (
-        _parse_single_image_detections_as_dataarrays(
-            ensemble_data[:, 0:4],
-            ensemble_data[:, 4],
-            ensemble_data[:, 5],
-        )
-    )
-
-    return centroid_da, shape_da, confidence_da, label_da
-
-
-def _fuse_single_image_detections(
-    fusion_function: Callable,
-    position: np.ndarray,
-    shape: np.ndarray,
-    confidence: np.ndarray,
-    label: np.ndarray,
-    image_width_height: np.ndarray,
-    max_n_detections: int,
-    **fusion_kwargs: Unpack[_TypeFusionMethodKwargs],  #  method-only kwargs
-) -> TupleFourDataArrays:
-    """Fuse detections across models for a single image using selected method."""
-    # Prepare single image arrays for fusion
-    list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
-        _preprocess_single_image_detections(
-            position, shape, confidence, label, image_width_height
-        )
     )
 
-    # Run fusion method on one image
-    ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = (
-        fusion_function(
-            list_bboxes_per_model,
-            list_confidence_per_model,
-            list_label_per_model,
-            **fusion_kwargs,
-        )
-    )
 
-    # Format output as xarray dataarrays
-    centroid_da, shape_da, confidence_da, label_da = (
-        _postprocess_single_image_detections(
-            ensemble_x1y1_x2y2_norm,
-            ensemble_scores,
-            ensemble_labels,
-            image_width_height,
-            max_n_detections,
-        )
+def _remove_nan_and_pad_to_max(
+    input_array, mask_non_nan_rows, max_n_detections, fill_value=np.nan
+):
+    """Remove non-nan from input array and pad with nans, all along first dimension."""
+    # Initialise array with nans
+    padded_array = np.full(
+        (max_n_detections, *input_array.shape[1:]),
+        fill_value,
+        dtype=input_array.dtype,
     )
-
-    return centroid_da, shape_da, confidence_da, label_da
+    # Replace top "mask_non_nan_rows.sum()" chunk with non-nan values from
+    # input array
+    padded_array[: mask_non_nan_rows.sum()] = input_array[mask_non_nan_rows]
+    return padded_array
 
 
 def _parse_single_image_detections_as_dataarrays(
@@ -392,55 +381,3 @@ def _parse_single_image_detections_as_dataarrays(
         xr.DataArray(scores_array, dims=["id"], coords=id_coords),
         xr.DataArray(labels_array, dims=["id"], coords=id_coords),
     )
-
-
-@_check_output(ValidBboxDetectionsDataset)
-def _postprocess_multi_image_fused_arrays(
-    results_per_img_id: list[TupleFourDataArrays],
-    list_img_id: list,
-) -> xr.Dataset:
-    """Postprocess fused data arrays on multiple images after fusion.
-
-    Fix padding and assign id coordinates.
-    """
-    # Parse results from joblib
-    # (output from joblib is a list of n = n_images, each element
-    # containing a tuple of data arrays)
-    list_da_dict = {}
-    (
-        list_da_dict["position"],
-        list_da_dict["shape"],
-        list_da_dict["confidence"],
-        list_da_dict["label"],
-    ) = list(zip(*results_per_img_id))
-
-    # Concatenate lists of dataarrays along image_id dimension
-    fused_da_dict = {}
-    for da_str, list_da in list_da_dict.items():
-        fused_da_dict[da_str] = xr.concat(
-            list_da,
-            pd.Index(list_img_id, name="image_id"),
-        )
-
-    # Remove extra padding in id dimension
-    fixed_padding_da_dict = {}
-    for da_str, da in fused_da_dict.items():
-        fixed_padding_da_dict[da_str] = da.dropna(dim="id", how="all")
-
-    # Pad labels with -1 rather than nan
-    fixed_padding_da_dict["label"] = (
-        fixed_padding_da_dict["label"].fillna(-1).astype(int)
-    )
-
-    # Format as dataset
-    return xr.Dataset(data_vars=fixed_padding_da_dict)
-    # # Assign id coordinates to data arrays
-    # # (these are lost after apply_ufunc because exclude_dims is used)
-    # n_max_detections = fixed_padding_da_dict["position"].sizes["id"]
-    # id_coords = np.arange(n_max_detections)
-
-    # fixed_id_coord_da_dict = {}
-    # for da_str, da in fixed_padding_da_dict.items():
-    #     fixed_id_coord_da_dict[da_str] = da.assign_coords(id=id_coords)
-
-    # return fixed_padding_da_dict

From fb779ddaac4ad6a65909c5b85864be618ba3a7c1 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 13:16:59 +0000
Subject: [PATCH 27/39] Add centroid, shape to corner utils

---
 ethology/detectors/ensembles/fusion.py | 39 ++++++++++++++++----------
 ethology/detectors/ensembles/models.py | 12 ++++++--
 ethology/detectors/ensembles/utils.py  | 17 +++++++++++
 3 files changed, 50 insertions(+), 18 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 948ae776..7d2239fa 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -16,6 +16,10 @@
     ValidBboxDetectionsEnsembleDataset,
 )
 from ethology.validators.utils import _check_input, _check_output
+from ethology.detectors.ensembles.utils import (
+    centroid_shape_to_corners,
+    corners_to_centroid_shape,
+)
 
 # ------------------- Supported fusion methods ------------------
 # from ensemble_boxes
@@ -69,7 +73,7 @@ def fuse_detections(
     fusion_method: TypeFusionMethod,
     fusion_method_kwargs: dict | None = None,
     max_n_detections: int | None = None,
-    n_workers: int | None = -1,  
+    n_workers: int | None = -1,
 ) -> xr.Dataset:
     """Fuse ensemble detections across models using the selected method.
 
@@ -126,6 +130,7 @@ def fuse_detections(
 
 # ------- Multi image fusion ------------------
 
+
 @_check_output(ValidBboxDetectionsDataset)
 def _postprocess_multi_image_fused_arrays(
     results_per_img_id: list[TupleFourDataArrays],
@@ -176,7 +181,7 @@ def _validate_image_shape(image_shape) -> np.ndarray:
 @_check_input(ValidBboxDetectionsEnsembleDataset)
 def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int:
     """Get upper bound for maximum number of boxes per image after fusion.
-    
+
     We assume no detections are fused and all images have as many detections as the maximum
     number of non-nan detections per image.
     """
@@ -246,8 +251,9 @@ def _preprocess_single_image_detections(
     """Prepare detections of an ensemble on a single image for fusion."""
     # Prepare boxes array
     # transform position and shape arrays to x1y1x2y normalised
-    bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None]
-    bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None]
+    x1y1, x2y2 = centroid_shape_to_corners(position, shape)
+    bboxes_x1y1 = x1y1 / image_width_height[:, None, None]
+    bboxes_x2y2 = x2y2 / image_width_height[:, None, None]
     bboxes_x1y1_x2y2_normalised = np.transpose(
         np.concat(
             [bboxes_x1y1, bboxes_x2y2]
@@ -259,7 +265,7 @@ def _preprocess_single_image_detections(
     # Get list of bboxes per model
     # arrays need to be tall for fusion methods
     n_models = bboxes_x1y1_x2y2_normalised.shape[-1]
-    list_bboxes_per_model = [
+    list_x1y1_x2y2_norm_per_model = [
         arr.squeeze()
         for arr in np.split(bboxes_x1y1_x2y2_normalised, n_models, axis=-1)
     ]
@@ -273,12 +279,15 @@ def _preprocess_single_image_detections(
 
     # Remove rows with nan coordinates and return lists of arrays
     list_non_nan_bboxes_per_model = [
-        sum(~np.any(np.isnan(arr), axis=1)) for arr in list_bboxes_per_model
-    ]  
+        sum(~np.any(np.isnan(arr), axis=1))
+        for arr in list_x1y1_x2y2_norm_per_model
+    ]
     return (
-        _chop_end_of_array(list_arrays_per_model, list_non_nan_bboxes_per_model)
+        _chop_end_of_array(
+            list_arrays_per_model, list_non_nan_bboxes_per_model
+        )
         for list_arrays_per_model in [
-            list_bboxes_per_model,
+            list_x1y1_x2y2_norm_per_model,
             list_confidence_per_model,
             list_label_per_model,
         ]
@@ -361,8 +370,10 @@ def _parse_single_image_detections_as_dataarrays(
         n_detections = x1y1_x2y2_array.shape[0]
         id_array = np.arange(n_detections)
 
-    # Extract bbox corner coordinates
-    x1y1, x2y2 = x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4]
+    # Extract bbox centre and shape
+    centroid, shape = corners_to_centroid_shape(
+        x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4]
+    )
 
     # Shared coordinates
     id_coords = {"id": id_array}
@@ -371,13 +382,11 @@ def _parse_single_image_detections_as_dataarrays(
     # Build all DataArrays
     return (
         xr.DataArray(
-            (0.5 * (x1y1 + x2y2)).T,
+            centroid.T,
             dims=["space", "id"],
             coords=spatial_id_coords,
         ),
-        xr.DataArray(
-            (x2y2 - x1y1).T, dims=["space", "id"], coords=spatial_id_coords
-        ),
+        xr.DataArray(shape.T, dims=["space", "id"], coords=spatial_id_coords),
         xr.DataArray(scores_array, dims=["id"], coords=id_coords),
         xr.DataArray(labels_array, dims=["id"], coords=id_coords),
     )
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 03d20211..11fb5494 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -11,7 +11,10 @@
 from lightning import LightningModule
 from torchvision.models import detection, get_model, list_models
 
-from ethology.detectors.ensembles.utils import pad_to_max_first_dimension
+from ethology.detectors.ensembles.utils import (
+    corners_to_centroid_shape,
+    pad_to_max_first_dimension,
+)
 from ethology.validators.detections import ValidBboxDetectionsEnsembleDataset
 from ethology.validators.utils import _check_output
 
@@ -177,8 +180,11 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
         # arrays of shape (image_id, 4/1, n_max_detections, n_models)
 
         # Compute centroid and shape arrays
-        centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4])
-        shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2]
+        # centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4])
+        # shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2]
+        centroid_array, shape_array = corners_to_centroid_shape(
+            bboxes_array[:, 0:2], bboxes_array[:, 2:4]
+        )
 
         # Return as ethology detections dataset
         max_n_detections = bboxes_array.shape[-2]
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
index 0ab1e2f8..03ff3b2d 100644
--- a/ethology/detectors/ensembles/utils.py
+++ b/ethology/detectors/ensembles/utils.py
@@ -23,3 +23,20 @@ def pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
         for arr in list_arrays
     ]
     return list_arrays_padded
+
+
+def centroid_shape_to_corners(position, shape):
+    """Convert centroid and shape arrays to x1y1, x2y2 corner arrays."""
+    half_shape = shape / 2
+    return (
+        position - half_shape,  # x1y1
+        position + half_shape,  # x2y2
+    )
+
+
+def corners_to_centroid_shape(x1y1, x2y2):
+    """Convert x1y1, x2y2 corner arrays to centroid and shape arrays."""
+    return (
+        0.5 * (x1y1 + x2y2),  # centroid
+        x2y2 - x1y1,  # shape
+    )

From 8d9a6dfe15ce96053aea4b768dbc3d9d76654cf1 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Thu, 4 Dec 2025 13:17:21 +0000
Subject: [PATCH 28/39] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/detectors/ensembles/fusion.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 7d2239fa..2b0801ac 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -11,15 +11,15 @@
 from joblib import Parallel, delayed
 from tqdm import tqdm
 
+from ethology.detectors.ensembles.utils import (
+    centroid_shape_to_corners,
+    corners_to_centroid_shape,
+)
 from ethology.validators.detections import (
     ValidBboxDetectionsDataset,
     ValidBboxDetectionsEnsembleDataset,
 )
 from ethology.validators.utils import _check_input, _check_output
-from ethology.detectors.ensembles.utils import (
-    centroid_shape_to_corners,
-    corners_to_centroid_shape,
-)
 
 # ------------------- Supported fusion methods ------------------
 # from ensemble_boxes

From c3ab6891e6cf5747e4c770f6585f0ba4083c65d6 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 16:11:03 +0000
Subject: [PATCH 29/39] Make format_predictions a staticmethod

---
 ethology/detectors/ensembles/models.py |  29 ++-
 examples/ensemble_of_detectors.py      | 333 +++++++++++++++++++++++++
 2 files changed, 356 insertions(+), 6 deletions(-)
 create mode 100644 examples/ensemble_of_detectors.py

diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 11fb5494..1830fc71 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -9,6 +9,7 @@
 import xarray as xr
 import yaml
 from lightning import LightningModule
+from torch.nn.parallel import parallel_apply
 from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import (
@@ -112,10 +113,19 @@ def predict_step(self, batch, batch_idx):
         """Predict step for a single batch."""
         # Run all models in ensemble in GPU
         images_batch, _annotations_batch = batch
+        # # -----------------------------------
         raw_prediction_dicts_per_model = [
             model(images_batch) for model in self.list_models
         ]  # [num_models][batch_size]
 
+        # Run all models in parallel on this GPU
+        # inputs = [(images_batch,)] * len(self.list_models[:3])
+        # raw_prediction_dicts_per_model = parallel_apply(
+        #     modules=self.list_models, #-----
+        #     inputs=[(images_batch,)] * len(self.list_models),
+        # )
+        # # -----------------------------------
+
         # Transpose to [batch_size][num_models] for easier downstream
         # processing
         raw_prediction_dicts_per_sample = [
@@ -127,16 +137,23 @@ def predict_step(self, batch, batch_idx):
 
         return raw_prediction_dicts_per_sample
 
+    @staticmethod
     @_check_output(ValidBboxDetectionsEnsembleDataset)
-    def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
-        """Format as ethology detections dataset with model axis."""
+    def format_predictions(
+        predictions: list[dict], attrs: dict | None = None
+    ) -> xr.Dataset:
+        """Format as ethology detections dataset with model axis.
+
+        predictions: raw_predictions_per_model
+        """
         # Get results from trainer
-        raw_predictions_per_model = self.trainer.predict_loop.predictions
+        # raw_predictions_per_model = self.trainer.predict_loop.predictions
 
         # Flatten batches
         raw_prediction_dicts_per_sample = list(
-            chain.from_iterable(raw_predictions_per_model)
+            chain.from_iterable(predictions)
         )  # [sample][model]
+        n_models = len(raw_prediction_dicts_per_sample[0])
 
         # Parse output from dicts
         output_per_sample: dict[str, list] = {
@@ -146,7 +163,7 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
         }
         for ky in output_per_sample:
             output_per_sample[ky] = [
-                [sample[m][ky] for m in range(len(self.list_models))]
+                [sample[m][ky] for m in range(n_models)]
                 for sample in raw_prediction_dicts_per_sample
             ]  # [sample][model]
 
@@ -204,7 +221,7 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset:
                 "image_id": np.arange(n_images),
                 "space": ["x", "y"],
                 "id": np.arange(max_n_detections),
-                "model": np.arange(len(self.list_models)),
+                "model": np.arange(n_models),
             },
             attrs=attrs if attrs else {},
         )
diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
new file mode 100644
index 00000000..86911633
--- /dev/null
+++ b/examples/ensemble_of_detectors.py
@@ -0,0 +1,333 @@
+"""Evaluating ensemble of trained detectors."""
+# %%
+# imports
+
+from pathlib import Path
+
+import numpy as np
+import torch
+import torchvision.transforms.v2 as transforms
+import xarray as xr
+import yaml
+from lightning import Trainer
+from matplotlib import pyplot as plt
+from torch.utils.data import DataLoader
+from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
+
+from ethology.detectors.ensembles.fusion import fuse_detections
+from ethology.detectors.ensembles.models import EnsembleDetector
+from ethology.detectors.evaluate import compute_precision_recall_ds
+from ethology.io.annotations import load_bboxes
+
+# %%
+# %matplotlib widget
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+
+
+# Helper functions
+def create_coco_dataset(
+    images_dir: str | Path,
+    annotations_file: str | Path,
+    composed_transform: transforms.Compose,
+) -> CocoDetection:
+    """Create a COCO dataset for object detection.
+
+    Note: transforms are applied to the full dataset. If the dataset
+    is later split, all splits will have the same transforms.
+    """
+    dataset_coco = CocoDetection(
+        root=images_dir,
+        annFile=annotations_file,
+        transforms=composed_transform,
+    )
+
+    # wrap dataset for transforms v2
+    dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco)
+
+    return dataset_transformed
+
+
+def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
+    """Collate function for dataloader with varying number of bounding boxes.
+
+    A custom function is needed for detection
+    because the number of bounding boxes varies
+    between images of the same batch.
+    See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop
+
+    Parameters
+    ----------
+    batch : tuple
+        a tuple of 2 tuples, the first one holding all images in the batch,
+        and the second one holding the corresponding annotations.
+
+    Returns
+    -------
+    tuple
+        a tuple of length = batch size, made up of (image, annotations)
+        tuples.
+
+    """
+    return tuple(zip(*batch, strict=True))
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Input data
+
+dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full")
+images_dir = dataset_dir / "frames"
+annotations_dir = dataset_dir / "annotations"
+annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json"
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Define a dataloader
+# Define transforms for inference
+inference_transforms = transforms.Compose(
+    [
+        transforms.ToImage(),
+        transforms.ToDtype(torch.float32, scale=True),
+    ]
+)
+
+# Create COCO dataset
+# TODO: convert from ethology detections dataset to COCO dataset
+# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
+dataset_coco = create_coco_dataset(
+    images_dir=Path(dataset_dir) / "frames",
+    annotations_file=annotations_file_path,
+    composed_transform=inference_transforms,
+)
+
+# dataloader
+dataloader = DataLoader(
+    dataset_coco,
+    batch_size=12,  # 12,
+    shuffle=False,
+    num_workers=8,  # 4
+    collate_fn=collate_fn_varying_n_bboxes,
+    persistent_workers=True,
+    # pin_memory=True,  # <-- Faster CPU->GPU transfer
+    # because we guarantee a physical address for the data
+    # in memory, so we can use DMA that directly takes it to
+    # the GPU
+    # prefetch_factor=4,  # <-- Prefetch more batches
+    # multiprocessing_context="fork"
+    # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available()
+    # else None,  # see https://github.com/pytorch/pytorch/issues/87688
+)
+
+# %%
+# TODO: dataloader to ethology detections dataset
+gt_bboxes_ds = load_bboxes.from_files(
+    annotations_file_path, format="COCO", images_dirs=images_dir
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Define a YAML config file for the ensemble of trained detectors
+experiment_ID = "617393114420881798"
+ml_runs_experiment_dir = (
+    Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID
+)
+last_ckpt = Path("checkpoints") / "last.ckpt"
+
+config = {
+    "models": {
+        "model_class": "fasterrcnn_resnet50_fpn_v2",
+        # imported from torchvision.models.detection
+        "model_kwargs": {
+            "num_classes": 2,
+            "weights": None,  # null in YAML becomes None in Python
+            "weights_backbone": None,
+        },
+        "checkpoints": [
+            str(
+                ml_runs_experiment_dir
+                / "f348d9d196934073bece1b877cbc4d38"
+                / last_ckpt
+            ),  # above_0th
+            str(
+                ml_runs_experiment_dir
+                / "879d2f77e2b24adcb06b87d2fede6a04"
+                / last_ckpt
+            ),  # above_1st
+            str(
+                ml_runs_experiment_dir
+                / "75583ec227e3444ab692b99c64795325"
+                / last_ckpt
+            ),  # above_5th
+            str(
+                ml_runs_experiment_dir
+                / "4acc37206b1e4f679d535c837bee2c2f"
+                / last_ckpt
+            ),  # above_10th
+            str(
+                ml_runs_experiment_dir
+                / "fdcf88fcbcc84fbeb94b45ca6b6f8914"
+                / last_ckpt
+            ),  # above_25th
+            str(
+                ml_runs_experiment_dir
+                / "daa05ded0ea047388c9134bf044061c5"
+                / last_ckpt
+            ),  # above_50th
+        ],
+    },
+    "fusion": {
+        "method": "weighted_boxes_fusion",
+        # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted"
+        "method_kwargs": {
+            # arguments as in ensemble_boxes.weighted_boxes_fusion
+            "iou_thr": 0.5,  # iou threshold for the ensemble
+            "skip_box_thr": 0.0001,
+        },
+        "n_jobs": -1,  # workers for joblib.Parallel,
+        # n_workers should be <= number of CPU cores
+        # follows joblib n_jobs
+        # if -1: all are used
+        # if None: same as 1
+        # "confidence_threshold_post_fusion": 0.0,
+        "max_n_detections": 300,
+    },
+}
+config_file = "ensemble_of_detectors.yaml"
+with open(config_file, "w") as f:
+    yaml.dump(config, f, sort_keys=False)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Load the ensemble of detectors
+ensemble_detector = EnsembleDetector(config_file)
+print(f"Ensemble detector is on device: {ensemble_detector.device}")
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Run the ensemble of detectors on a dataset
+# Use Trainer for inference (this sets the device flexibly)
+
+# With multiple devices:
+# Lightning handles the "main" device (so still device=1), 
+# while code internally distributes models across GPUs using parallel_apply.
+trainer = Trainer(
+    accelerator="gpu",
+    devices=1,
+    logger=False,
+    precision="16-mixed",  # --- results change
+    # strategy = 'ddp' ?
+)
+predictions = trainer.predict(ensemble_detector, dataloader)
+
+
+# %%
+# Format predictions as ethology detections dataset and add attrs
+# TODO: think about syntax of format_predictions (should it be instance or
+# static method instead?)
+ensemble_detections_ds = ensemble_detector.format_predictions(
+    predictions=predictions,
+    attrs=gt_bboxes_ds.attrs
+)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Fuse detections across models using selected method
+image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
+ensemble_detections_ds.attrs["image_shape"] = image_width_height
+config_fusion: dict = config["fusion"]
+
+
+fused_detections_ds = fuse_detections(
+    ensemble_detections_ds,
+    fusion_method=config_fusion["method"],
+    fusion_method_kwargs=config_fusion["method_kwargs"],
+    # n_workers=config_fusion.get("n_jobs", 1),
+    # max_n_detections=config_fusion["max_n_detections"],
+    # should be larger than expected maximum number of detections after fusion
+    # ---- method kwargs ----
+)
+
+# %%
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Fuse detections across models with NMS
+
+# fused_detections_nms_ds = fuse_ensemble_detections(
+#     ensemble_detections_ds,
+#     fusion_method="soft_nms",
+#     fusion_method_kwargs={
+#         "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
+#         "sigma": 0.5,
+#         "thresh": 0.001,
+#     },
+#     max_n_detections=500,
+# )
+
+# fused_detections_ds = fused_detections_nms_ds
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Remove low confidence detections
+confidence_threshold_post_fusion = 0.4
+fused_detections_ds_ = fused_detections_ds.where(
+    fused_detections_ds.confidence >= confidence_threshold_post_fusion
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Evaluate the ensemble model
+# - load ground truth
+# - compute metrics
+
+# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
+
+iou_threshold_tp = 0.25
+fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds(
+    pred_bboxes_ds=fused_detections_ds_,
+    gt_bboxes_ds=gt_bboxes_ds,
+    iou_threshold=iou_threshold_tp,
+)
+
+# All models on full August dataset, without removing low
+# confidence detections:
+# confidence_threshold_post_fusion = 0.0
+# Precision: 0.5920
+# Recall: 0.8455
+# ---
+# confidence_threshold_post_fusion = 0.4
+# Precision: 0.8339
+# Recall: 0.7177
+# ---
+# confidence_threshold_post_fusion = 0.5
+# Precision: 0.8714
+# Recall: 0.6624
+# ---
+# confidence threshold post fusion: 0.40 AND mixed precision in trainer
+# Precision: 0.8336
+# Recall: 0.7162
+
+print(
+    "Ensemble model with confidence threshold post fusion: "
+    f"{confidence_threshold_post_fusion:.2f}"
+)
+print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}")
+print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}")
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Evaluate single models
+list_detections_ds_eval = []
+for k in range(ensemble_detections_ds.sizes["model"]):
+    # filter low confidence detections (for a fairer comparison)
+    detections_one_model = ensemble_detections_ds.where(
+        ensemble_detections_ds.confidence >= confidence_threshold_post_fusion
+    ).sel(model=k)
+
+    # evaluate
+    detections_ds, _ = compute_precision_recall_ds(
+        pred_bboxes_ds=detections_one_model,
+        gt_bboxes_ds=gt_bboxes_ds,
+        iou_threshold=iou_threshold_tp,
+    )
+    list_detections_ds_eval.append(detections_ds)
+
+    print(f"Model: {k}")
+    print(f"Precision: {detections_ds.precision.mean().values:.4f}")
+    print(f"Recall: {detections_ds.recall.mean().values:.4f}")
+    print("--------------------------------")
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

From d1532a883cfbf838a3546b420a8f5194c5bf6b0c Mon Sep 17 00:00:00 2001
From: Niko Sirmpilatze <niko.sirbiladze@gmail.com>
Date: Wed, 26 Nov 2025 19:15:04 +0000
Subject: [PATCH 30/39] Update supported Python versions to 3.11 - 3.13 (#120)

* update supported Python version to 3.11 - 3.13

* Use default python version for docs build

* Bring back Python version for build docs action

---------

Co-authored-by: sfmig <33267254+sfmig@users.noreply.github.com>
---
 .github/workflows/docs_build_and_deploy.yml | 3 ++-
 .github/workflows/test_and_deploy.yml       | 6 +++---
 .pre-commit-config.yaml                     | 3 ---
 CONTRIBUTING.md                             | 2 +-
 README.md                                   | 2 +-
 docs/source/environment.yml                 | 2 +-
 docs/source/installation.md                 | 2 +-
 pyproject.toml                              | 8 ++++----
 8 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/docs_build_and_deploy.yml b/.github/workflows/docs_build_and_deploy.yml
index a60ab166..82200ca4 100644
--- a/.github/workflows/docs_build_and_deploy.yml
+++ b/.github/workflows/docs_build_and_deploy.yml
@@ -37,11 +37,12 @@ jobs:
     steps:
       - uses: neuroinformatics-unit/actions/build_sphinx_docs@main
         with:
-          python-version: 3.12
+          python-version: 3.13 # default for the action is 3.x
           use-make: true
           fetch-tags: true
           use-artifactci: lazy
 
+
   deploy_sphinx_docs:
     name: Deploy Sphinx Docs
     needs: build_sphinx_docs
diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml
index e23c718a..df909c7c 100644
--- a/.github/workflows/test_and_deploy.yml
+++ b/.github/workflows/test_and_deploy.yml
@@ -32,14 +32,14 @@ jobs:
     strategy:
       matrix:
         # Run all supported Python versions on linux
-        python-version: ["3.10", "3.11", "3.12"]
+        python-version: ["3.11", "3.12", "3.13"]
         os: [ubuntu-latest]
         # Include one windows and macos run
         include:
         - os: macos-latest
-          python-version: "3.11"
+          python-version: "3.13"
         - os: windows-latest
-          python-version: "3.11"
+          python-version: "3.13"
 
     steps:
       # Run tests
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 82f7b937..d5863f9c 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -55,6 +55,3 @@ repos:
     rev: v2.4.1
     hooks:
     - id: codespell
-      additional_dependencies:
-      # tomli dependency can be removed when we drop support for Python 3.10
-      - tomli
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 8be95ddc..a7c1d138 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -11,7 +11,7 @@ development environment. In the following, we assume you have
 To install `ethology` for development, first create and activate a `conda` environment:
 
 ```sh
-conda create -n ethology-dev python=3.12
+conda create -n ethology-dev python=3.13
 conda activate ethology-dev
 ```
 
diff --git a/README.md b/README.md
index addf5f8f..cf9b6c1f 100644
--- a/README.md
+++ b/README.md
@@ -15,7 +15,7 @@ Mix-and-match computer vision tools for your animal behaviour analysis.
 
 Create a conda environment and install the package
 ```sh
-conda create -n ethology-env python=3.12 -y
+conda create -n ethology-env python=3.13 -y
 conda activate ethology-env
 pip install ethology
 ```
diff --git a/docs/source/environment.yml b/docs/source/environment.yml
index 3c0494e6..1c1bd064 100644
--- a/docs/source/environment.yml
+++ b/docs/source/environment.yml
@@ -3,7 +3,7 @@
 channels:
   - conda-forge
 dependencies:
-  - python=3.12
+  - python=3.13
   - pytables
   # - pip:
   #   - ethology
diff --git a/docs/source/installation.md b/docs/source/installation.md
index 327f5dda..760912f0 100644
--- a/docs/source/installation.md
+++ b/docs/source/installation.md
@@ -21,7 +21,7 @@ git clone https://github.com/neuroinformatics-unit/ethology.git
 
 Then create a conda environment and install the package from source
 ```sh
-conda create -n ethology-env python=3.12 -y
+conda create -n ethology-env python=3.13 -y
 conda activate ethology-env
 cd ethology
 pip install .
diff --git a/pyproject.toml b/pyproject.toml
index ad9b345e..4a7b7b63 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -3,7 +3,7 @@ name = "ethology"
 authors = [{ name = "Adam Tyson", email = "code@adamltyson.com" }]
 description = "Data processing tools for animal behavioural analysis"
 readme = "README.md"
-requires-python = ">=3.10.0"
+requires-python = ">=3.11.0"
 dynamic = ["version"]
 
 license = { text = "BSD-3-Clause" }
@@ -12,9 +12,9 @@ classifiers = [
   "Development Status :: 2 - Pre-Alpha",
   "Programming Language :: Python",
   "Programming Language :: Python :: 3",
-  "Programming Language :: Python :: 3.10",
   "Programming Language :: Python :: 3.11",
   "Programming Language :: Python :: 3.12",
+  "Programming Language :: Python :: 3.13",
   "Operating System :: OS Independent",
   "License :: OSI Approved :: BSD License",
 ]
@@ -133,14 +133,14 @@ check-hidden = true
 [tool.tox]
 legacy_tox_ini = """
 [tox]
-envlist = py{310,311,312}
+envlist = py{311,312,313}
 isolated_build = True
 
 [gh-actions]
 python =
-    3.10: py310
     3.11: py311
     3.12: py312
+    3.13: py313
 
 [testenv]
 extras =

From 399bd246208b5126e3694a207ff0c79decfb46cb Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Mon, 1 Dec 2025 19:55:20 +0000
Subject: [PATCH 31/39] [pre-commit.ci] pre-commit autoupdate (#121)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

updates:
- [github.com/astral-sh/ruff-pre-commit: v0.14.3 → v0.14.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.14.3...v0.14.7)
- [github.com/pre-commit/mirrors-mypy: v1.18.2 → v1.19.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.18.2...v1.19.0)

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
---
 .pre-commit-config.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index d5863f9c..b1c797fc 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,12 +29,12 @@ repos:
       - id: rst-directive-colons
       - id: rst-inline-touching-normal
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.14.3
+    rev: v0.14.7
     hooks:
       - id: ruff
       - id: ruff-format
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v1.18.2
+    rev: v1.19.0
     hooks:
         - id: mypy
           additional_dependencies:

From a10a96d622e3e3408db868892263f2b4a610a761 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 16:41:15 +0000
Subject: [PATCH 32/39] Make constant attributes in validators class variables
 (#123)

* Classvars for validators WIP

* Make defaults class variables also for file validators

* Update docstrings

* Add uv.lock to .gitignore

* Clarify comments on class variables in validators to indicate they should not be modified after initialization.

* Remove class attribute from docstring (not numpy style)

* Fix excluded modules in API index generation script

* Add preliminary tests
---
 .gitignore                                    |  3 +
 docs/make_api_index.py                        |  2 +-
 ethology/validators/annotations.py            | 78 +++++++------------
 ethology/validators/detections.py             | 45 +++++------
 ethology/validators/utils.py                  | 42 +++++-----
 tests/test_unit/test_validators/test_utils.py | 65 ++++++++++++++++
 6 files changed, 141 insertions(+), 94 deletions(-)
 create mode 100644 tests/test_unit/test_validators/test_utils.py

diff --git a/.gitignore b/.gitignore
index 2a7466dd..53fa4926 100644
--- a/.gitignore
+++ b/.gitignore
@@ -78,6 +78,9 @@ venv/
 # pyenv
 .python-version
 
+# uv
+uv.lock
+
 # OS
 .DS_Store
 
diff --git a/docs/make_api_index.py b/docs/make_api_index.py
index 934a3886..64465e24 100644
--- a/docs/make_api_index.py
+++ b/docs/make_api_index.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 
 # Modules to exclude from the API index
-exclude_modules = ["ethology.io.annotations.json_schemas"]
+exclude_modules = ["ethology.validators.json_schemas"]
 
 # Set the current working directory to the directory of this script
 script_dir = Path(__file__).resolve().parent
diff --git a/ethology/validators/annotations.py b/ethology/validators/annotations.py
index 0ecb886c..427440c3 100644
--- a/ethology/validators/annotations.py
+++ b/ethology/validators/annotations.py
@@ -2,6 +2,7 @@
 
 import json
 from pathlib import Path
+from typing import ClassVar
 
 import pandas as pd
 import pandera.pandas as pa
@@ -29,9 +30,9 @@ class ValidVIA:
     ----------
     path : Path | str
         Path to the VIA JSON file, passed as an input.
-    schema : dict
+    schema : ClassVar[dict]
         The JSON schema is set to the default VIA schema.
-    required_keys : dict
+    required_keys : ClassVar[dict]
         The required keys for the VIA JSON file.
 
     Raises
@@ -49,21 +50,15 @@ class ValidVIA:
     """
 
     path: Path = field(converter=Path)
-    schema: dict = field(
-        default=_get_default_schema("VIA"),
-        init=False,
-    )
-    required_keys: dict = field(
-        default={
-            "main": ["_via_img_metadata", "_via_attributes"],
-            "images": ["filename"],
-            "regions": ["shape_attributes"],
-            "shape_attributes": ["x", "y", "width", "height"],
-        },
-        init=False,
-        # with init=False the attribute is always initialized
-        # with the default value
-    )
+
+    # class variables: should not be modified after initialization
+    schema: ClassVar[dict] = _get_default_schema("VIA")
+    required_keys: ClassVar[dict] = {
+        "main": ["_via_img_metadata", "_via_attributes"],
+        "images": ["filename"],
+        "regions": ["shape_attributes"],
+        "shape_attributes": ["x", "y", "width", "height"],
+    }
 
     # Note: the validators are applied in order
     @path.validator
@@ -121,9 +116,9 @@ class ValidCOCO:
     ----------
     path : Path | str
         Path to the COCO JSON file, passed as an input.
-    schema : dict
+    schema : ClassVar[dict]
         The JSON schema is set to the default COCO schema.
-    required_keys : dict
+    required_keys : ClassVar[dict]
         The required keys for the COCO JSON file.
 
     Raises
@@ -141,23 +136,15 @@ class ValidCOCO:
     """
 
     path: Path = field(converter=Path)
-    schema: dict = field(
-        default=_get_default_schema("COCO"),
-        init=False,
-        # with init=False the attribute is always initialized
-        # with the default value
-    )
 
-    # The keys of "required_keys" match the 1st level keys in a COCO JSON file
-    required_keys: dict = field(
-        default={
-            "main": ["images", "annotations", "categories"],
-            "images": ["id", "file_name", "width", "height"],
-            "annotations": ["id", "image_id", "bbox", "category_id"],
-            "categories": ["id", "name"],  # exclude "supercategory"
-        },
-        init=False,
-    )
+    # class variables: should not be modified after initialization
+    schema: ClassVar[dict] = _get_default_schema("COCO")
+    required_keys: ClassVar[dict] = {
+        "main": ["images", "annotations", "categories"],
+        "images": ["id", "file_name", "width", "height"],
+        "annotations": ["id", "image_id", "bbox", "category_id"],
+        "categories": ["id", "name"],  # exclude "supercategory"
+    }  # keys match the 1st level keys in a COCO JSON file
 
     # Note: the validators are applied in order
     @path.validator
@@ -241,10 +228,10 @@ class ValidBboxAnnotationsDataset(ValidDataset):
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
-    required_dims : set[str]
+    required_dims : ClassVar[set]
         The set of required dimension names: ``image_id``, ``space`` and
         ``id``.
-    required_data_vars : dict[str, set]
+    required_data_vars : ClassVar[dict[str, set]]
         A dictionary mapping data variable names to their required minimum
         dimensions:
 
@@ -267,17 +254,12 @@ class ValidBboxAnnotationsDataset(ValidDataset):
     """
 
     # Minimum requirements for a bbox dataset holding detections
-    required_dims: set = field(
-        default={"image_id", "space", "id"},
-        init=False,
-    )
-    required_data_vars: dict = field(
-        default={
-            "position": {"image_id", "space", "id"},
-            "shape": {"image_id", "space", "id"},
-        },
-        init=False,
-    )
+    # Should not be modified after initialization
+    required_dims: ClassVar[set] = {"image_id", "space", "id"}
+    required_data_vars: ClassVar[dict[str, set]] = {
+        "position": {"image_id", "space", "id"},
+        "shape": {"image_id", "space", "id"},
+    }
 
 
 class ValidBboxAnnotationsDataFrame(pa.DataFrameModel):
diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py
index a22dab62..87268bea 100644
--- a/ethology/validators/detections.py
+++ b/ethology/validators/detections.py
@@ -1,6 +1,8 @@
 """Validators for detection datasets."""
 
-from attrs import define, field
+from typing import ClassVar
+
+from attrs import define
 
 from ethology.validators.utils import ValidDataset
 
@@ -23,10 +25,10 @@ class ValidBboxDetectionsDataset(ValidDataset):
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
-    required_dims : set
+    required_dims : ClassVar[set]
         The set of required dimension names: ``image_id``, ``space`` and
         ``id``.
-    required_data_vars : dict[str, set]
+    required_data_vars : ClassVar[dict[str, set]]
         A dictionary mapping data variable names to their required minimum
         dimensions:
 
@@ -50,18 +52,13 @@ class ValidBboxDetectionsDataset(ValidDataset):
     """
 
     # Minimum requirements for a bbox dataset holding detections
-    required_dims: set = field(
-        default={"image_id", "space", "id"},
-        init=False,
-    )
-    required_data_vars: dict = field(
-        default={
-            "position": {"image_id", "space", "id"},
-            "shape": {"image_id", "space", "id"},
-            "confidence": {"image_id", "id"},
-        },
-        init=False,
-    )
+    # Should not be modified after initialization
+    required_dims: ClassVar[set] = {"image_id", "space", "id"}
+    required_data_vars: ClassVar[dict[str, set]] = {
+        "position": {"image_id", "space", "id"},
+        "shape": {"image_id", "space", "id"},
+        "confidence": {"image_id", "id"},
+    }
 
 
 @define
@@ -110,15 +107,9 @@ class ValidBboxDetectionsEnsembleDataset(ValidDataset):
     """
 
     # Minimum requirements for a bbox dataset holding detections
-    required_dims: set = field(
-        default={"image_id", "space", "id", "model"},
-        init=False,
-    )
-    required_data_vars: dict = field(
-        default={
-            "position": {"image_id", "space", "id", "model"},
-            "shape": {"image_id", "space", "id", "model"},
-            "confidence": {"image_id", "id", "model"},
-        },
-        init=False,
-    )
+    required_dims: ClassVar[set] = {"image_id", "space", "id", "model"}
+    required_data_vars: ClassVar[dict] = {
+        "position": {"image_id", "space", "id", "model"},
+        "shape": {"image_id", "space", "id", "model"},
+        "confidence": {"image_id", "id", "model"},
+    }
diff --git a/ethology/validators/utils.py b/ethology/validators/utils.py
index ce74a289..ce85ff0a 100644
--- a/ethology/validators/utils.py
+++ b/ethology/validators/utils.py
@@ -1,8 +1,9 @@
 """Utils for validating `ethology` objects."""
 
-from abc import ABC, abstractmethod
+from abc import ABC
 from collections.abc import Callable
 from functools import wraps
+from typing import ClassVar
 
 import xarray as xr
 from attrs import define, field
@@ -20,18 +21,18 @@ class ValidDataset(ABC):
     - has the correct dimensions for each data variable
 
     Subclasses must define ``required_dims`` and ``required_data_vars``
-    attributes.
+    class attributes.
 
     Attributes
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
-    required_dims : set[str]
-        A set of required dimension names. This attribute should be
+    required_dims : ClassVar[set[str]]
+        A set of required dimension names. This class attribute must be
         defined by any subclass inheriting from this class.
-    required_data_vars : dict[str, set]
+    required_data_vars : ClassVar[dict[str, set]]
         A dictionary mapping data variable names to their required dimensions.
-        This attribute should be defined by any subclass inheriting from
+        This class attribute must be defined by any subclass inheriting from
         this class.
 
     Raises
@@ -51,18 +52,23 @@ class ValidDataset(ABC):
 
     dataset: xr.Dataset = field()
 
-    # Subclasses should override these abstract properties
-    @property
-    @abstractmethod
-    def required_dims(self) -> set:
-        """Subclasses must provide a ``required_dims`` property."""
-        pass  # pragma: no cover
-
-    @property
-    @abstractmethod
-    def required_data_vars(self) -> dict[str, set]:
-        """Subclasses must provide a ``required_data_vars`` property."""
-        pass  # pragma: no cover
+    # class variables
+    required_dims: ClassVar[set]
+    required_data_vars: ClassVar[dict[str, set]]
+
+    def __init_subclass__(cls, **kwargs):
+        """Verify that subclasses define required class variables."""
+        super().__init_subclass__(**kwargs)
+
+        if not hasattr(cls, "required_dims"):
+            raise TypeError(
+                f"{cls.__name__} must define 'required_dims' class variable"
+            )
+        if not hasattr(cls, "required_data_vars"):
+            raise TypeError(
+                f"{cls.__name__} must define 'required_data_vars' "
+                "class variable"
+            )
 
     # Validators
     @dataset.validator
diff --git a/tests/test_unit/test_validators/test_utils.py b/tests/test_unit/test_validators/test_utils.py
new file mode 100644
index 00000000..ac91ddaa
--- /dev/null
+++ b/tests/test_unit/test_validators/test_utils.py
@@ -0,0 +1,65 @@
+import pytest
+from attrs import define
+
+from ethology.validators.utils import ValidDataset
+
+
+@pytest.mark.parametrize(
+    "missing_attr, expected_error_match",
+    [
+        (
+            "required_dims",
+            ".*must define 'required_dims' class variable",
+        ),
+        (
+            "required_data_vars",
+            ".*must define 'required_data_vars' class variable",
+        ),
+        (
+            "both",
+            ".*must define 'required_dims' class variable",
+        ),
+    ],
+    ids=[
+        "missing_required_dims",
+        "missing_required_data_vars",
+        "missing_both_class_vars",
+    ],
+)
+def test_subclass_missing_class_vars_raises_type_error(
+    missing_attr, expected_error_match
+):
+    """Test that subclasses without required class vars raise TypeError."""
+    with pytest.raises(TypeError, match=expected_error_match):
+        if missing_attr == "required_dims":
+
+            @define
+            class InvalidDataset(ValidDataset):
+                required_data_vars = {"position": {"x", "y"}}
+
+        elif missing_attr == "required_data_vars":
+
+            @define
+            class InvalidDataset(ValidDataset):
+                required_dims = {"x", "y"}
+
+        else:
+
+            @define
+            class InvalidDataset(ValidDataset):
+                pass
+
+
+def test_subclass_with_both_class_vars_does_not_raise():
+    """Test that a valid subclass with both class vars works correctly."""
+    required_dims_in = {"x", "y"}
+    required_data_vars_in = {"position": {"x", "y"}}
+
+    @define
+    class ValidCustomDataset(ValidDataset):
+        required_dims = required_dims_in
+        required_data_vars = required_data_vars_in
+
+    # Verify the class attributes
+    assert ValidCustomDataset.required_dims == required_dims_in
+    assert ValidCustomDataset.required_data_vars == required_data_vars_in

From 92028667db9da543d443dad882830ff49af578d0 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:25:10 +0000
Subject: [PATCH 33/39] Expand validator tests for ensemble validator

---
 ethology/validators/detections.py             |   9 +-
 .../test_validators/test_detections.py        | 238 ++++++++++++------
 2 files changed, 162 insertions(+), 85 deletions(-)

diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py
index 87268bea..a91be7a9 100644
--- a/ethology/validators/detections.py
+++ b/ethology/validators/detections.py
@@ -63,7 +63,7 @@ class ValidBboxDetectionsDataset(ValidDataset):
 
 @define
 class ValidBboxDetectionsEnsembleDataset(ValidDataset):
-    """Class for valid ``ethology`` bounding box ensembledetections datasets.
+    """Class for valid ``ethology`` bounding box ensemble detections datasets.
 
     This class validates that the input dataset:
 
@@ -80,10 +80,10 @@ class ValidBboxDetectionsEnsembleDataset(ValidDataset):
     ----------
     dataset : xarray.Dataset
         The xarray dataset to validate.
-    required_dims : set
+    required_dims : ClassVar[set]
         The set of required dimension names: ``image_id``, ``space``, ``id``
-         and ``model``.
-    required_data_vars : dict[str, set]
+        and ``model``.
+    required_data_vars : ClassVar[dict[str, set]]
         A dictionary mapping data variable names to their required minimum
         dimensions:
 
@@ -107,6 +107,7 @@ class ValidBboxDetectionsEnsembleDataset(ValidDataset):
     """
 
     # Minimum requirements for a bbox dataset holding detections
+    # Should not be modified after initialization
     required_dims: ClassVar[set] = {"image_id", "space", "id", "model"}
     required_data_vars: ClassVar[dict] = {
         "position": {"image_id", "space", "id", "model"},
diff --git a/tests/test_unit/test_validators/test_detections.py b/tests/test_unit/test_validators/test_detections.py
index d053d6ef..5a60da56 100644
--- a/tests/test_unit/test_validators/test_detections.py
+++ b/tests/test_unit/test_validators/test_detections.py
@@ -4,7 +4,10 @@
 import pytest
 import xarray as xr
 
-from ethology.validators.detections import ValidBboxDetectionsDataset
+from ethology.validators.detections import (
+    ValidBboxDetectionsDataset,
+    ValidBboxDetectionsEnsembleDataset,
+)
 
 
 @pytest.fixture
@@ -38,6 +41,28 @@ def valid_bbox_detections_dataset():
     return ds
 
 
+@pytest.fixture
+def valid_bbox_detections_ensemble_dataset(valid_bbox_detections_dataset):
+    """Create a valid bbox detections ensemble_dataset for validation."""
+    # Add model dimension
+    ds = valid_bbox_detections_dataset.expand_dims(
+        model=["model_a", "model_b"]
+    )
+
+    return ds
+
+
+@pytest.fixture
+def valid_bbox_detections_ensemble_dataset_extra_vars_and_dims(
+    valid_bbox_detections_ensemble_dataset: xr.Dataset,
+) -> xr.Dataset:
+    ds = valid_bbox_detections_ensemble_dataset.copy(deep=True)
+    ds.coords["extra_dim"] = [10, 20, 30]
+    ds["extra_var_1"] = (["image_id"], np.random.rand(len(ds.image_id)))
+    ds["extra_var_2"] = (["id"], np.random.rand(len(ds.id)))
+    return ds
+
+
 @pytest.fixture
 def valid_bbox_detections_dataset_extra_vars_and_dims(
     valid_bbox_detections_dataset: xr.Dataset,
@@ -49,44 +74,71 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
     return ds
 
 
+# Define validator configurations
+VALIDATOR_CONFIGS: dict = {
+    "detections_ds": {
+        "validator_class": ValidBboxDetectionsDataset,
+        "valid_fixture": "valid_bbox_detections_dataset",
+        "valid_fixture_extra": (
+            "valid_bbox_detections_dataset_extra_vars_and_dims"
+        ),
+        "required_dims": {"image_id", "space", "id"},
+        "required_data_vars": {
+            "position": {"image_id", "space", "id"},
+            "shape": {"image_id", "space", "id"},
+            "confidence": {"image_id", "id"},
+        },
+    },
+    "ensemble_ds": {
+        "validator_class": ValidBboxDetectionsEnsembleDataset,
+        "valid_fixture": "valid_bbox_detections_ensemble_dataset",
+        "valid_fixture_extra": (
+            "valid_bbox_detections_ensemble_dataset_extra_vars_and_dims"
+        ),
+        "required_dims": {"image_id", "space", "id", "model"},
+        "required_data_vars": {
+            "position": {"image_id", "space", "id", "model"},
+            "shape": {"image_id", "space", "id", "model"},
+            "confidence": {"image_id", "id", "model"},
+        },
+    },
+}
+
+
+@pytest.mark.parametrize("validator_type", ["detections_ds", "ensemble_ds"])
+@pytest.mark.parametrize(
+    "valid_fixture_key",
+    [
+        "valid_fixture",
+        "valid_fixture_extra",
+    ],
+)
+def test_validator_bbox_detections_dataset_valid(
+    validator_type: str,
+    valid_fixture_key: str,
+    request: pytest.FixtureRequest,
+):
+    """Test bbox detections dataset validation with valid datasets."""
+    config = VALIDATOR_CONFIGS[validator_type]
+    fixture_name = config[valid_fixture_key]
+    dataset = request.getfixturevalue(fixture_name)
+
+    validator_class = config["validator_class"]
+    with does_not_raise():
+        validator = validator_class(dataset=dataset)
+
+    assert validator.dataset is dataset
+    assert validator.required_dims == config["required_dims"]
+    assert validator.required_data_vars == config["required_data_vars"]
+
+
+@pytest.mark.parametrize(
+    "validator",
+    [ValidBboxDetectionsDataset, ValidBboxDetectionsEnsembleDataset],
+)
 @pytest.mark.parametrize(
     "sample_dataset, expected_exception, expected_error_message",
     [
-        (
-            "valid_bbox_detections_dataset",
-            does_not_raise(),
-            "",
-        ),
-        (
-            "valid_bbox_detections_dataset_extra_vars_and_dims",
-            does_not_raise(),
-            "",
-        ),
-        (
-            xr.Dataset(
-                coords={
-                    "image_id": np.arange(3),
-                    "space": np.arange(2),
-                    "id": np.arange(2),
-                },
-                data_vars={
-                    "position": (
-                        ["image_id", "space", "id"],
-                        np.zeros((3, 2, 2)),
-                    ),
-                    "shape": (
-                        ["image_id", "space", "id", "foo"],
-                        np.zeros((3, 2, 2, 1)),
-                    ),
-                    "confidence": (
-                        ["image_id", "id"],
-                        np.zeros((3, 2)),
-                    ),
-                },
-            ),
-            does_not_raise(),
-            "",
-        ),
         (
             {"position": [1, 2, 3], "shape": [4, 5, 6]},
             pytest.raises(TypeError),
@@ -130,13 +182,56 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
             pytest.raises(ValueError),
             "Missing required data variables: ['confidence', 'shape']",
         ),
+    ],
+    ids=[
+        "invalid_type",
+        "invalid_missing_data_var",
+        "invalid_missing_multiple_data_vars",
+    ],
+)
+def test_validator_bbox_detections_dataset_invalid(
+    validator: type[ValidBboxDetectionsDataset]
+    | type[ValidBboxDetectionsEnsembleDataset],
+    sample_dataset: xr.Dataset,
+    expected_exception: pytest.raises,
+    expected_error_message: str,
+):
+    """Test bbox annotations dataset validation in various input scenarios."""
+    # Run validation and check exception
+    with expected_exception as excinfo:
+        _validator = validator(dataset=sample_dataset)
+    if excinfo:
+        error_msg = str(excinfo.value)
+        assert error_msg in expected_error_message
+
+
+@pytest.mark.parametrize(
+    "validator",
+    [ValidBboxDetectionsDataset, ValidBboxDetectionsEnsembleDataset],
+)
+@pytest.mark.parametrize(
+    "sample_dataset, expected_exception, expected_error_message",
+    [
         (
             xr.Dataset(
-                coords={"image_id": np.arange(3), "id": np.arange(2)},
+                coords={
+                    "image_id": np.arange(3),
+                    "id": np.arange(2),
+                    "model": np.arange(2),
+                },
                 data_vars={
-                    "position": (["image_id", "id"], np.zeros((3, 2))),
-                    "shape": (["image_id", "id"], np.zeros((3, 2))),
-                    "confidence": (["image_id", "id"], np.zeros((3, 2))),
+                    "position": (
+                        ["image_id", "id", "model"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "shape": (
+                        ["image_id", "id", "model"],
+                        np.zeros((3, 2, 2)),
+                    ),
+                    "confidence": (
+                        ["image_id", "id", "model"],
+                        np.zeros((3, 2, 2)),
+                    ),
                 },
             ),
             pytest.raises(ValueError),
@@ -148,19 +243,20 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
                     "foo": np.arange(3),
                     "bar": ["x", "y"],
                     "id": np.arange(2),
+                    "model": np.arange(2),
                 },
                 data_vars={
                     "position": (
-                        ["foo", "bar", "id"],
-                        np.zeros((3, 2, 2)),
+                        ["foo", "bar", "id", "model"],
+                        np.zeros((3, 2, 2, 2)),
                     ),
                     "shape": (
-                        ["foo", "bar", "id"],
-                        np.zeros((3, 2, 2)),
+                        ["foo", "bar", "id", "model"],
+                        np.zeros((3, 2, 2, 2)),
                     ),
                     "confidence": (
-                        ["foo", "id"],
-                        np.zeros((3, 2)),
+                        ["foo", "id", "model"],
+                        np.zeros((3, 2, 2)),
                     ),
                 },
             ),
@@ -173,19 +269,20 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
                     "image_id": np.arange(3),
                     "space": np.arange(2),
                     "id": np.arange(2),
+                    "model": np.arange(2),
                 },
                 data_vars={
                     "position": (
-                        ["image_id", "space", "id"],
-                        np.zeros((3, 2, 2)),
+                        ["image_id", "space", "id", "model"],
+                        np.zeros((3, 2, 2, 2)),
                     ),
                     "shape": (
-                        ["image_id", "id"],
-                        np.zeros((3, 2)),
+                        ["image_id", "id", "model"],
+                        np.zeros((3, 2, 2)),
                     ),
                     "confidence": (
-                        ["image_id", "id"],
-                        np.zeros((3, 2)),
+                        ["image_id", "id", "model"],
+                        np.zeros((3, 2, 2)),
                     ),
                 },
             ),
@@ -197,42 +294,21 @@ def valid_bbox_detections_dataset_extra_vars_and_dims(
         ),
     ],
     ids=[
-        "valid_bbox_detections",
-        "valid_bbox_detections_extra_vars_and_dims",
-        "valid_bbox_detections_extra_dims_in_shape_var",
-        "invalid_bbox_detections_type",
-        "invalid_bbox_detections_dataset_missing_data_var",
-        "invalid_bbox_detections_missing_multiple_data_vars",
-        "invalid_bbox_detections_missing_dimension",
-        "invalid_bbox_detections_missing_multiple_dimensions",
-        "invalid_bbox_detections_missing_dimension_in_data_var",
+        "invalid_missing_dimension",
+        "invalid_missing_multiple_dimensions",
+        "invalid_missing_dimension_in_data_var",
     ],
 )
-def test_validator_bbox_detections_dataset(
-    sample_dataset: str | dict,
+def test_validator_bbox_detections_dataset_missing_dims(
+    validator: type[ValidBboxDetectionsDataset]
+    | type[ValidBboxDetectionsEnsembleDataset],
+    sample_dataset: xr.Dataset,
     expected_exception: pytest.raises,
     expected_error_message: str,
-    request: pytest.FixtureRequest,
 ):
-    """Test bbox annotations dataset validation in various input scenarios."""
-    # Get dataset to validate
-    if isinstance(sample_dataset, str):
-        dataset = request.getfixturevalue(sample_dataset)
-    else:
-        dataset = sample_dataset
-
     # Run validation and check exception
     with expected_exception as excinfo:
-        validator = ValidBboxDetectionsDataset(dataset=dataset)
-
+        _validator = validator(dataset=sample_dataset)
     if excinfo:
         error_msg = str(excinfo.value)
         assert error_msg in expected_error_message
-    else:
-        assert validator.dataset is dataset
-        assert validator.required_dims == {"image_id", "space", "id"}
-        assert validator.required_data_vars == {
-            "position": {"image_id", "space", "id"},
-            "shape": {"image_id", "space", "id"},
-            "confidence": {"image_id", "id"},
-        }

From 3aa7f4368c0760715335f9f6105fe2c7a543a622 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:48:43 +0000
Subject: [PATCH 34/39] Start test for utils

---
 .../test_detectors_ensembles/test_utils.py    | 66 +++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 tests/test_unit/test_detectors_ensembles/test_utils.py

diff --git a/tests/test_unit/test_detectors_ensembles/test_utils.py b/tests/test_unit/test_detectors_ensembles/test_utils.py
new file mode 100644
index 00000000..c88199af
--- /dev/null
+++ b/tests/test_unit/test_detectors_ensembles/test_utils.py
@@ -0,0 +1,66 @@
+import numpy as np
+import pytest
+
+from ethology.detectors.ensembles.utils import (
+    _centroid_shape_to_corners,
+    _corners_to_centroid_shape,
+    _get_padding_width,
+    _pad_to_max_first_dimension,
+)
+
+
+def test_get_padding_width():
+    pass
+
+
+@pytest.mark.parametrize(
+    "fill_value",
+    [
+        np.nan,
+        np.inf,
+        42
+    ],
+)
+def test_pad_to_max_first_dimension(fill_value):
+    """Test padding all arrays in list along first dimension."""
+    # Get max array length
+    list_arrays = [np.zeros((1, 2, 3)), np.zeros((10, 2, 3))]
+    max_array_length = max([arr.shape[0] for arr in list_arrays])
+
+    # Pad
+    list_arrays_padded = _pad_to_max_first_dimension(list_arrays, fill_value)
+
+    # Assert all same length
+    assert all(
+        [arr.shape[0] == max_array_length for arr in list_arrays_padded]
+    )
+    # Assert other dimensions stay the same
+    assert all(
+        [
+            arr.shape[1:] == arr_input.shape[1:]
+            for arr, arr_input in zip(
+                list_arrays_padded, list_arrays, strict=True
+            )
+        ]
+    )
+    # Assert padding value
+    assert all(
+        [
+            np.allclose(
+                arr[arr_input.shape[0]:],
+                np.full_like(arr[arr_input.shape[0]:], fill_value),
+                equal_nan=True,
+            )
+            for arr, arr_input in zip(
+                list_arrays_padded, list_arrays, strict=True
+            )
+        ]
+    )
+
+
+def test_centroid_shape_to_corners():
+    pass
+
+
+def test_corners_to_centroid_shape():
+    pass

From b18a3e578992c8aa408360c7e609511d5f488a88 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:08:32 +0000
Subject: [PATCH 35/39] Make utils private

---
 ethology/detectors/ensembles/fusion.py |  8 ++++----
 ethology/detectors/ensembles/models.py | 10 +++++-----
 ethology/detectors/ensembles/utils.py  | 24 +++++++++++++++++-------
 3 files changed, 26 insertions(+), 16 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 2b0801ac..3452e37a 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -12,8 +12,8 @@
 from tqdm import tqdm
 
 from ethology.detectors.ensembles.utils import (
-    centroid_shape_to_corners,
-    corners_to_centroid_shape,
+    _centroid_shape_to_corners,
+    _corners_to_centroid_shape,
 )
 from ethology.validators.detections import (
     ValidBboxDetectionsDataset,
@@ -251,7 +251,7 @@ def _preprocess_single_image_detections(
     """Prepare detections of an ensemble on a single image for fusion."""
     # Prepare boxes array
     # transform position and shape arrays to x1y1x2y normalised
-    x1y1, x2y2 = centroid_shape_to_corners(position, shape)
+    x1y1, x2y2 = _centroid_shape_to_corners(position, shape)
     bboxes_x1y1 = x1y1 / image_width_height[:, None, None]
     bboxes_x2y2 = x2y2 / image_width_height[:, None, None]
     bboxes_x1y1_x2y2_normalised = np.transpose(
@@ -371,7 +371,7 @@ def _parse_single_image_detections_as_dataarrays(
         id_array = np.arange(n_detections)
 
     # Extract bbox centre and shape
-    centroid, shape = corners_to_centroid_shape(
+    centroid, shape = _corners_to_centroid_shape(
         x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4]
     )
 
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index 1830fc71..ad6fe51d 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -13,8 +13,8 @@
 from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import (
-    corners_to_centroid_shape,
-    pad_to_max_first_dimension,
+    _corners_to_centroid_shape,
+    _pad_to_max_first_dimension,
 )
 from ethology.validators.detections import ValidBboxDetectionsEnsembleDataset
 from ethology.validators.utils import _check_output
@@ -173,11 +173,11 @@ def format_predictions(
             ky: [] for ky in output_per_sample
         }
         for ky in output_per_sample_padded:
-            output_per_sample_padded[ky] = pad_to_max_first_dimension(
+            output_per_sample_padded[ky] = _pad_to_max_first_dimension(
                 [
                     # pad across models
                     np.stack(
-                        pad_to_max_first_dimension(
+                        _pad_to_max_first_dimension(
                             output_one_sample, fill_value[ky]
                         ),
                         axis=-1,
@@ -199,7 +199,7 @@ def format_predictions(
         # Compute centroid and shape arrays
         # centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4])
         # shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2]
-        centroid_array, shape_array = corners_to_centroid_shape(
+        centroid_array, shape_array = _corners_to_centroid_shape(
             bboxes_array[:, 0:2], bboxes_array[:, 2:4]
         )
 
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
index 03ff3b2d..ab1a757c 100644
--- a/ethology/detectors/ensembles/utils.py
+++ b/ethology/detectors/ensembles/utils.py
@@ -3,20 +3,20 @@
 import numpy as np
 
 
-def get_padding_width(array, max_n):
+def _get_padding_width(array, max_n):
     """Get pad width for array to max_n detections in the first dimension."""
     pad_width = array.ndim * [(0, 0)]
     pad_width[0] = (0, max_n - array.shape[0])  # before, after
     return pad_width
 
 
-def pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
+def _pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
     """Pad arrays in list to maximum size of their first dimension."""
     max_n_detections = max(array.shape[0] for array in list_arrays)
     list_arrays_padded = [
         np.pad(
             arr,
-            get_padding_width(arr, max_n_detections),
+            _get_padding_width(arr, max_n_detections),
             mode="constant",
             constant_values=fill_value,
         )
@@ -25,8 +25,15 @@ def pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
     return list_arrays_padded
 
 
-def centroid_shape_to_corners(position, shape):
-    """Convert centroid and shape arrays to x1y1, x2y2 corner arrays."""
+def _centroid_shape_to_corners(position, shape):
+    """Convert centroid and shape arrays to x1y1, x2y2 corner arrays.
+    
+    x1y1 is the top left corner (min x-coordinate, min y-coordinate), 
+    x2y2 is the bottom right corner (max x-coordinate, max y-coordinate) 
+    of the bounding box.
+
+    Space dimension is assumed to be the second dimension.
+    """
     half_shape = shape / 2
     return (
         position - half_shape,  # x1y1
@@ -34,8 +41,11 @@ def centroid_shape_to_corners(position, shape):
     )
 
 
-def corners_to_centroid_shape(x1y1, x2y2):
-    """Convert x1y1, x2y2 corner arrays to centroid and shape arrays."""
+def _corners_to_centroid_shape(x1y1, x2y2):
+    """Convert x1y1, x2y2 corner arrays to centroid and shape arrays.
+    
+    Space dimension is assumed to be the second dimension.
+    """
     return (
         0.5 * (x1y1 + x2y2),  # centroid
         x2y2 - x1y1,  # shape

From 57333f676f86fcb618606ebd31424961e8bbf86a Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:08:49 +0000
Subject: [PATCH 36/39] add basic tests for utils

---
 tests/test_unit/test_datasets/__init__.py     |  0
 .../test_detectors_ensembles/__init__.py      |  0
 .../test_detectors_ensembles/test_utils.py    | 77 +++++++++++++++----
 tests/test_unit/test_validators/__init__.py   |  0
 4 files changed, 64 insertions(+), 13 deletions(-)
 create mode 100644 tests/test_unit/test_datasets/__init__.py
 create mode 100644 tests/test_unit/test_detectors_ensembles/__init__.py
 create mode 100644 tests/test_unit/test_validators/__init__.py

diff --git a/tests/test_unit/test_datasets/__init__.py b/tests/test_unit/test_datasets/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/test_detectors_ensembles/__init__.py b/tests/test_unit/test_detectors_ensembles/__init__.py
new file mode 100644
index 00000000..e69de29b
diff --git a/tests/test_unit/test_detectors_ensembles/test_utils.py b/tests/test_unit/test_detectors_ensembles/test_utils.py
index c88199af..7835fcbd 100644
--- a/tests/test_unit/test_detectors_ensembles/test_utils.py
+++ b/tests/test_unit/test_detectors_ensembles/test_utils.py
@@ -9,17 +9,40 @@
 )
 
 
-def test_get_padding_width():
-    pass
+@pytest.mark.parametrize(
+    "array, target_first_dim, expected_pad_width_first_dim",
+    [
+        (
+            np.zeros((3,)),
+            5,
+            (0, 2),
+        ),  # 1D array
+        (
+            np.zeros((1, 2, 3)),
+            4,
+            (0, 3),
+        ),  # 3D array
+        (
+            np.zeros((10, 2, 3)),
+            10,
+            (0, 0),
+        ),  # No padding needed
+    ],
+)
+def test_get_padding_width(
+    array, target_first_dim, expected_pad_width_first_dim
+):
+    """Test getting padding width for arrays of different dimensions."""
+    pad_width = _get_padding_width(array, target_first_dim)
+
+    assert len(pad_width) == array.ndim
+    assert pad_width[0] == expected_pad_width_first_dim
+    assert all(pw == (0, 0) for pw in pad_width[1:])
 
 
 @pytest.mark.parametrize(
     "fill_value",
-    [
-        np.nan,
-        np.inf,
-        42
-    ],
+    [np.nan, np.inf, 42],
 )
 def test_pad_to_max_first_dimension(fill_value):
     """Test padding all arrays in list along first dimension."""
@@ -47,8 +70,8 @@ def test_pad_to_max_first_dimension(fill_value):
     assert all(
         [
             np.allclose(
-                arr[arr_input.shape[0]:],
-                np.full_like(arr[arr_input.shape[0]:], fill_value),
+                arr[arr_input.shape[0] :],
+                np.full_like(arr[arr_input.shape[0] :], fill_value),
                 equal_nan=True,
             )
             for arr, arr_input in zip(
@@ -58,9 +81,37 @@ def test_pad_to_max_first_dimension(fill_value):
     )
 
 
-def test_centroid_shape_to_corners():
-    pass
+@pytest.mark.parametrize(
+    "position, shape, expected_x1y1, expected_x2y2",
+    [
+        (
+            np.zeros((1, 2)),
+            np.array([[4, 2]]),
+            np.array([[-2, -1]]),
+            np.array([[2, 1]]),
+        )
+    ],
+)
+def test_centroid_shape_to_corners(
+    position, shape, expected_x1y1, expected_x2y2
+):
+    x1y1, x2y2 = _centroid_shape_to_corners(position, shape)
+    np.testing.assert_array_equal(x1y1, expected_x1y1)
+    np.testing.assert_array_equal(x2y2, expected_x2y2)
 
 
-def test_corners_to_centroid_shape():
-    pass
+@pytest.mark.parametrize(
+    "x1y1, x2y2, expected_position, expected_shape",
+    [
+        (
+            np.zeros((1, 2)),
+            np.ones((1, 2)),
+            np.array([[0.5, 0.5]]),
+            np.array([[1, 1]]),
+        )
+    ],
+)
+def test_corners_to_centroid_shape(x1y1, x2y2, expected_position, expected_shape):
+    position, shape = _corners_to_centroid_shape(x1y1, x2y2)
+    np.testing.assert_array_equal(position, expected_position)
+    np.testing.assert_array_equal(shape, expected_shape)
diff --git a/tests/test_unit/test_validators/__init__.py b/tests/test_unit/test_validators/__init__.py
new file mode 100644
index 00000000..e69de29b

From 1c0887529cba4c255974b3802a16fcbffb581394 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:22:48 +0000
Subject: [PATCH 37/39] pre-commit fixes

---
 ethology/detectors/ensembles/fusion.py        | 27 ++++++++++---------
 ethology/detectors/ensembles/models.py        |  1 -
 ethology/detectors/ensembles/utils.py         |  8 +++---
 .../test_detectors_ensembles/test_utils.py    |  4 ++-
 4 files changed, 21 insertions(+), 19 deletions(-)

diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py
index 3452e37a..d0dbfe71 100644
--- a/ethology/detectors/ensembles/fusion.py
+++ b/ethology/detectors/ensembles/fusion.py
@@ -142,7 +142,7 @@ def _postprocess_multi_image_fused_arrays(
     """
     # Transpose results from list-of-tuples to tuple-of-lists
     da_names = ("position", "shape", "confidence", "label")
-    da_lists = zip(*results_per_img_id)
+    da_lists = zip(*results_per_img_id, strict=True)
 
     # Concatenate lists of dataarrays along image_id dimension and
     # remove extra padding in "id" dimension
@@ -182,8 +182,8 @@ def _validate_image_shape(image_shape) -> np.ndarray:
 def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int:
     """Get upper bound for maximum number of boxes per image after fusion.
 
-    We assume no detections are fused and all images have as many detections as the maximum
-    number of non-nan detections per image.
+    We assume no detections are fused and all images have as many
+    detections as the maximum number of non-nan detections per image.
     """
     detections_w_non_nan_position = (
         ensemble_detections_ds.position.notnull().all(dim="space")
@@ -209,7 +209,7 @@ def _fuse_single_image_detections(
     max_n_detections: int,
     **fusion_kwargs: Unpack[_TypeFusionMethodKwargs],  #  method-only kwargs
 ) -> TupleFourDataArrays:
-    """Fuse detections across models for a single image using selected method."""
+    """Fuse detections for a single image with selected method."""
     # Prepare single image arrays for fusion
     list_bboxes_per_model, list_confidence_per_model, list_label_per_model = (
         _preprocess_single_image_detections(
@@ -284,20 +284,21 @@ def _preprocess_single_image_detections(
     ]
     return (
         _chop_end_of_array(
-            list_arrays_per_model, list_non_nan_bboxes_per_model
-        )
-        for list_arrays_per_model in [
-            list_x1y1_x2y2_norm_per_model,
-            list_confidence_per_model,
-            list_label_per_model,
-        ]
+            list_x1y1_x2y2_norm_per_model, list_non_nan_bboxes_per_model
+        ),
+        _chop_end_of_array(
+            list_confidence_per_model, list_non_nan_bboxes_per_model
+        ),
+        _chop_end_of_array(
+            list_label_per_model, list_non_nan_bboxes_per_model
+        ),
     )
 
 
 def _chop_end_of_array(
     list_arrays: list[np.ndarray], list_end_lengths: list[int]
 ) -> list[np.ndarray]:
-    """Chop end of arrays in list to the desired length along the first dimension."""
+    """Chop end of arrays in list to desired length along first dimension."""
     return [
         arr[:n] for arr, n in zip(list_arrays, list_end_lengths, strict=True)
     ]
@@ -346,7 +347,7 @@ def _postprocess_single_image_detections(
 def _remove_nan_and_pad_to_max(
     input_array, mask_non_nan_rows, max_n_detections, fill_value=np.nan
 ):
-    """Remove non-nan from input array and pad with nans, all along first dimension."""
+    """Remove non-nan from input array and pad, all along first dimension."""
     # Initialise array with nans
     padded_array = np.full(
         (max_n_detections, *input_array.shape[1:]),
diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py
index ad6fe51d..ab1eada4 100644
--- a/ethology/detectors/ensembles/models.py
+++ b/ethology/detectors/ensembles/models.py
@@ -9,7 +9,6 @@
 import xarray as xr
 import yaml
 from lightning import LightningModule
-from torch.nn.parallel import parallel_apply
 from torchvision.models import detection, get_model, list_models
 
 from ethology.detectors.ensembles.utils import (
diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py
index ab1a757c..4a686d44 100644
--- a/ethology/detectors/ensembles/utils.py
+++ b/ethology/detectors/ensembles/utils.py
@@ -27,9 +27,9 @@ def _pad_to_max_first_dimension(list_arrays, fill_value=np.nan):
 
 def _centroid_shape_to_corners(position, shape):
     """Convert centroid and shape arrays to x1y1, x2y2 corner arrays.
-    
-    x1y1 is the top left corner (min x-coordinate, min y-coordinate), 
-    x2y2 is the bottom right corner (max x-coordinate, max y-coordinate) 
+
+    x1y1 is the top left corner (min x-coordinate, min y-coordinate),
+    x2y2 is the bottom right corner (max x-coordinate, max y-coordinate)
     of the bounding box.
 
     Space dimension is assumed to be the second dimension.
@@ -43,7 +43,7 @@ def _centroid_shape_to_corners(position, shape):
 
 def _corners_to_centroid_shape(x1y1, x2y2):
     """Convert x1y1, x2y2 corner arrays to centroid and shape arrays.
-    
+
     Space dimension is assumed to be the second dimension.
     """
     return (
diff --git a/tests/test_unit/test_detectors_ensembles/test_utils.py b/tests/test_unit/test_detectors_ensembles/test_utils.py
index 7835fcbd..fcd1a54a 100644
--- a/tests/test_unit/test_detectors_ensembles/test_utils.py
+++ b/tests/test_unit/test_detectors_ensembles/test_utils.py
@@ -111,7 +111,9 @@ def test_centroid_shape_to_corners(
         )
     ],
 )
-def test_corners_to_centroid_shape(x1y1, x2y2, expected_position, expected_shape):
+def test_corners_to_centroid_shape(
+    x1y1, x2y2, expected_position, expected_shape
+):
     position, shape = _corners_to_centroid_shape(x1y1, x2y2)
     np.testing.assert_array_equal(position, expected_position)
     np.testing.assert_array_equal(shape, expected_shape)

From 5399fef73580b9686d9b36bd71eb37a653d5e196 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:29:05 +0000
Subject: [PATCH 38/39] Remove example notebook

---
 examples/ensemble_of_detectors.py | 333 ------------------------------
 1 file changed, 333 deletions(-)
 delete mode 100644 examples/ensemble_of_detectors.py

diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py
deleted file mode 100644
index 86911633..00000000
--- a/examples/ensemble_of_detectors.py
+++ /dev/null
@@ -1,333 +0,0 @@
-"""Evaluating ensemble of trained detectors."""
-# %%
-# imports
-
-from pathlib import Path
-
-import numpy as np
-import torch
-import torchvision.transforms.v2 as transforms
-import xarray as xr
-import yaml
-from lightning import Trainer
-from matplotlib import pyplot as plt
-from torch.utils.data import DataLoader
-from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2
-
-from ethology.detectors.ensembles.fusion import fuse_detections
-from ethology.detectors.ensembles.models import EnsembleDetector
-from ethology.detectors.evaluate import compute_precision_recall_ds
-from ethology.io.annotations import load_bboxes
-
-# %%
-# %matplotlib widget
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-
-
-# Helper functions
-def create_coco_dataset(
-    images_dir: str | Path,
-    annotations_file: str | Path,
-    composed_transform: transforms.Compose,
-) -> CocoDetection:
-    """Create a COCO dataset for object detection.
-
-    Note: transforms are applied to the full dataset. If the dataset
-    is later split, all splits will have the same transforms.
-    """
-    dataset_coco = CocoDetection(
-        root=images_dir,
-        annFile=annotations_file,
-        transforms=composed_transform,
-    )
-
-    # wrap dataset for transforms v2
-    dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco)
-
-    return dataset_transformed
-
-
-def collate_fn_varying_n_bboxes(batch: tuple) -> tuple:
-    """Collate function for dataloader with varying number of bounding boxes.
-
-    A custom function is needed for detection
-    because the number of bounding boxes varies
-    between images of the same batch.
-    See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop
-
-    Parameters
-    ----------
-    batch : tuple
-        a tuple of 2 tuples, the first one holding all images in the batch,
-        and the second one holding the corresponding annotations.
-
-    Returns
-    -------
-    tuple
-        a tuple of length = batch size, made up of (image, annotations)
-        tuples.
-
-    """
-    return tuple(zip(*batch, strict=True))
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Input data
-
-dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full")
-images_dir = dataset_dir / "frames"
-annotations_dir = dataset_dir / "annotations"
-annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json"
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Define a dataloader
-# Define transforms for inference
-inference_transforms = transforms.Compose(
-    [
-        transforms.ToImage(),
-        transforms.ToDtype(torch.float32, scale=True),
-    ]
-)
-
-# Create COCO dataset
-# TODO: convert from ethology detections dataset to COCO dataset
-# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
-dataset_coco = create_coco_dataset(
-    images_dir=Path(dataset_dir) / "frames",
-    annotations_file=annotations_file_path,
-    composed_transform=inference_transforms,
-)
-
-# dataloader
-dataloader = DataLoader(
-    dataset_coco,
-    batch_size=12,  # 12,
-    shuffle=False,
-    num_workers=8,  # 4
-    collate_fn=collate_fn_varying_n_bboxes,
-    persistent_workers=True,
-    # pin_memory=True,  # <-- Faster CPU->GPU transfer
-    # because we guarantee a physical address for the data
-    # in memory, so we can use DMA that directly takes it to
-    # the GPU
-    # prefetch_factor=4,  # <-- Prefetch more batches
-    # multiprocessing_context="fork"
-    # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available()
-    # else None,  # see https://github.com/pytorch/pytorch/issues/87688
-)
-
-# %%
-# TODO: dataloader to ethology detections dataset
-gt_bboxes_ds = load_bboxes.from_files(
-    annotations_file_path, format="COCO", images_dirs=images_dir
-)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Define a YAML config file for the ensemble of trained detectors
-experiment_ID = "617393114420881798"
-ml_runs_experiment_dir = (
-    Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID
-)
-last_ckpt = Path("checkpoints") / "last.ckpt"
-
-config = {
-    "models": {
-        "model_class": "fasterrcnn_resnet50_fpn_v2",
-        # imported from torchvision.models.detection
-        "model_kwargs": {
-            "num_classes": 2,
-            "weights": None,  # null in YAML becomes None in Python
-            "weights_backbone": None,
-        },
-        "checkpoints": [
-            str(
-                ml_runs_experiment_dir
-                / "f348d9d196934073bece1b877cbc4d38"
-                / last_ckpt
-            ),  # above_0th
-            str(
-                ml_runs_experiment_dir
-                / "879d2f77e2b24adcb06b87d2fede6a04"
-                / last_ckpt
-            ),  # above_1st
-            str(
-                ml_runs_experiment_dir
-                / "75583ec227e3444ab692b99c64795325"
-                / last_ckpt
-            ),  # above_5th
-            str(
-                ml_runs_experiment_dir
-                / "4acc37206b1e4f679d535c837bee2c2f"
-                / last_ckpt
-            ),  # above_10th
-            str(
-                ml_runs_experiment_dir
-                / "fdcf88fcbcc84fbeb94b45ca6b6f8914"
-                / last_ckpt
-            ),  # above_25th
-            str(
-                ml_runs_experiment_dir
-                / "daa05ded0ea047388c9134bf044061c5"
-                / last_ckpt
-            ),  # above_50th
-        ],
-    },
-    "fusion": {
-        "method": "weighted_boxes_fusion",
-        # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted"
-        "method_kwargs": {
-            # arguments as in ensemble_boxes.weighted_boxes_fusion
-            "iou_thr": 0.5,  # iou threshold for the ensemble
-            "skip_box_thr": 0.0001,
-        },
-        "n_jobs": -1,  # workers for joblib.Parallel,
-        # n_workers should be <= number of CPU cores
-        # follows joblib n_jobs
-        # if -1: all are used
-        # if None: same as 1
-        # "confidence_threshold_post_fusion": 0.0,
-        "max_n_detections": 300,
-    },
-}
-config_file = "ensemble_of_detectors.yaml"
-with open(config_file, "w") as f:
-    yaml.dump(config, f, sort_keys=False)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Load the ensemble of detectors
-ensemble_detector = EnsembleDetector(config_file)
-print(f"Ensemble detector is on device: {ensemble_detector.device}")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Run the ensemble of detectors on a dataset
-# Use Trainer for inference (this sets the device flexibly)
-
-# With multiple devices:
-# Lightning handles the "main" device (so still device=1), 
-# while code internally distributes models across GPUs using parallel_apply.
-trainer = Trainer(
-    accelerator="gpu",
-    devices=1,
-    logger=False,
-    precision="16-mixed",  # --- results change
-    # strategy = 'ddp' ?
-)
-predictions = trainer.predict(ensemble_detector, dataloader)
-
-
-# %%
-# Format predictions as ethology detections dataset and add attrs
-# TODO: think about syntax of format_predictions (should it be instance or
-# static method instead?)
-ensemble_detections_ds = ensemble_detector.format_predictions(
-    predictions=predictions,
-    attrs=gt_bboxes_ds.attrs
-)
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Fuse detections across models using selected method
-image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1]
-ensemble_detections_ds.attrs["image_shape"] = image_width_height
-config_fusion: dict = config["fusion"]
-
-
-fused_detections_ds = fuse_detections(
-    ensemble_detections_ds,
-    fusion_method=config_fusion["method"],
-    fusion_method_kwargs=config_fusion["method_kwargs"],
-    # n_workers=config_fusion.get("n_jobs", 1),
-    # max_n_detections=config_fusion["max_n_detections"],
-    # should be larger than expected maximum number of detections after fusion
-    # ---- method kwargs ----
-)
-
-# %%
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Fuse detections across models with NMS
-
-# fused_detections_nms_ds = fuse_ensemble_detections(
-#     ensemble_detections_ds,
-#     fusion_method="soft_nms",
-#     fusion_method_kwargs={
-#         "iou_thr": config_fusion["method_kwargs"]["iou_thr"],
-#         "sigma": 0.5,
-#         "thresh": 0.001,
-#     },
-#     max_n_detections=500,
-# )
-
-# fused_detections_ds = fused_detections_nms_ds
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Remove low confidence detections
-confidence_threshold_post_fusion = 0.4
-fused_detections_ds_ = fused_detections_ds.where(
-    fused_detections_ds.confidence >= confidence_threshold_post_fusion
-)
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Evaluate the ensemble model
-# - load ground truth
-# - compute metrics
-
-# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO")
-
-iou_threshold_tp = 0.25
-fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds(
-    pred_bboxes_ds=fused_detections_ds_,
-    gt_bboxes_ds=gt_bboxes_ds,
-    iou_threshold=iou_threshold_tp,
-)
-
-# All models on full August dataset, without removing low
-# confidence detections:
-# confidence_threshold_post_fusion = 0.0
-# Precision: 0.5920
-# Recall: 0.8455
-# ---
-# confidence_threshold_post_fusion = 0.4
-# Precision: 0.8339
-# Recall: 0.7177
-# ---
-# confidence_threshold_post_fusion = 0.5
-# Precision: 0.8714
-# Recall: 0.6624
-# ---
-# confidence threshold post fusion: 0.40 AND mixed precision in trainer
-# Precision: 0.8336
-# Recall: 0.7162
-
-print(
-    "Ensemble model with confidence threshold post fusion: "
-    f"{confidence_threshold_post_fusion:.2f}"
-)
-print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}")
-print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}")
-
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Evaluate single models
-list_detections_ds_eval = []
-for k in range(ensemble_detections_ds.sizes["model"]):
-    # filter low confidence detections (for a fairer comparison)
-    detections_one_model = ensemble_detections_ds.where(
-        ensemble_detections_ds.confidence >= confidence_threshold_post_fusion
-    ).sel(model=k)
-
-    # evaluate
-    detections_ds, _ = compute_precision_recall_ds(
-        pred_bboxes_ds=detections_one_model,
-        gt_bboxes_ds=gt_bboxes_ds,
-        iou_threshold=iou_threshold_tp,
-    )
-    list_detections_ds_eval.append(detections_ds)
-
-    print(f"Model: {k}")
-    print(f"Precision: {detections_ds.precision.mean().values:.4f}")
-    print(f"Recall: {detections_ds.recall.mean().values:.4f}")
-    print("--------------------------------")
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%

From b34680d09abc2d56f8a421e41562ec0589136f1d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Thu, 4 Dec 2025 18:43:32 +0000
Subject: [PATCH 39/39] Docs fixes

---
 docs/requirements.txt | 1 +
 docs/source/conf.py   | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/docs/requirements.txt b/docs/requirements.txt
index cb55a343..b270cde9 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -9,4 +9,5 @@ sphinx-autodoc-typehints
 sphinx-design
 sphinx-gallery
 sphinx-notfound-page
+sphinx-paramlinks
 sphinx-sitemap
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6db7d86d..d906807e 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -37,6 +37,7 @@
     "sphinx.ext.autosummary",
     "sphinx.ext.viewcode",
     "sphinx.ext.intersphinx",
+    "sphinx.ext.doctest",  # for lightning docstrings
     "myst_parser",
     "nbsphinx",
     "notfound.extension",
@@ -44,6 +45,7 @@
     "sphinx_gallery.gen_gallery",
     "sphinx_sitemap",
     "sphinx.ext.autosectionlabel",
+    "sphinx_paramlinks",
 ]
 
 # Configure the myst parser to enable cool markdown features
@@ -186,6 +188,8 @@
         "https://python-jsonschema.readthedocs.io/en/stable/",
         None,
     ),
+    "torch": ("https://pytorch.org/docs/stable/", None),
+    "pytorch_lightning": ("https://lightning.ai/docs/pytorch/stable/", None),
 }