From d36c727fc05b0afb2b28f03d4e627f6460591ca3 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 13 Nov 2025 20:58:25 +0000 Subject: [PATCH 01/39] Example and lightning model --- ethology/detectors/ensembles/__init__.py | 0 ethology/detectors/ensembles/fusion.py | 40 ++++++ ethology/detectors/ensembles/models.py | 164 +++++++++++++++++++++ ethology/detectors/ensembles/utils.py | 107 ++++++++++++++ examples/ensemble_of_detectors.py | 172 +++++++++++++++++++++++ pyproject.toml | 8 +- 6 files changed, 490 insertions(+), 1 deletion(-) create mode 100644 ethology/detectors/ensembles/__init__.py create mode 100644 ethology/detectors/ensembles/fusion.py create mode 100644 ethology/detectors/ensembles/models.py create mode 100644 ethology/detectors/ensembles/utils.py create mode 100644 examples/ensemble_of_detectors.py diff --git a/ethology/detectors/ensembles/__init__.py b/ethology/detectors/ensembles/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py new file mode 100644 index 00000000..58521a69 --- /dev/null +++ b/ethology/detectors/ensembles/fusion.py @@ -0,0 +1,40 @@ +"""Wrappers around ensemble-boxes fusion functions.""" +import numpy as np +from ensemble_boxes import weighted_boxes_fusion + + +def weighted_boxes_fusion_in_pixels( + image_height_width: tuple[int, int], + boxes_list: list[np.ndarray], + scores_list: list[np.ndarray], + labels_list: list[np.ndarray], + iou_thr: float, + skip_box_thr: float, +): + """Fuse bboxes for a single image and return in pixels.""" + # Normalize boxes using image shape + image_height, image_width = image_height_width + boxes_list = [ + boxes + / np.array([image_width, image_height, image_width, image_height]) + if len(boxes) > 0 + else boxes + for boxes in boxes_list + ] + + # Apply WBF + fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion( + boxes_list, + scores_list, + labels_list, + iou_thr=iou_thr, + skip_box_thr=skip_box_thr, + ) + + # Denormalize boxes + # Format of returned bboxes is x1y1x2y2 in pixels like fasterrcnn + fused_boxes = fused_boxes * np.array( + [image_width, image_height, image_width, image_height] + ) + + return fused_boxes, fused_scores, fused_labels \ No newline at end of file diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py new file mode 100644 index 00000000..a42ce1ef --- /dev/null +++ b/ethology/detectors/ensembles/models.py @@ -0,0 +1,164 @@ +"""Lightning Modules for ensembles of detectors.""" + +from itertools import chain +from pathlib import Path + +import numpy as np +import torch +import torch.nn as nn +import torchvision.models.detection as detection_models +import xarray as xr +import yaml +from joblib import Parallel, delayed +from lightning import LightningModule + +from ethology.detectors.ensembles.fusion import weighted_boxes_fusion_in_pixels +from ethology.detectors.ensembles.utils import ( + arrays_to_ds_variables, + pad_to_max_first_dimension, +) + + +class EnsembleDetector(LightningModule): + """Ensemble of (trained) detectors for inference. + + Attributes + ---------- + config_file: str + Path to the YAML config file. + """ + + def __init__(self, config_file: str | Path): + super().__init__() + + # Load config + self.config_file = Path(config_file) + with open(self.config_file) as f: + self.config = yaml.safe_load(f) + + # Load list of models (nn.ModuleList) + self.list_models = self.load_models() + + def load_models(self) -> nn.ModuleList: + """Load models from checkpoints.""" + models_config = self.config["models"] + model_class = getattr(detection_models, models_config["model_class"]) + + list_models = [] + for checkpoint_path in models_config["checkpoints"]: + # Get model architecture and weights + model = model_class(**models_config["model_kwargs"]) + checkpoint = torch.load(checkpoint_path, map_location=self.device) + state_dict = checkpoint["state_dict"] + + # Load state dict into model + # PyTorch Lightning saves the model with a "model." + # prefix in the state_dict keys if you defined self.model + # in your LightningModule - we remove the prefix here. + if any(key.startswith("model.") for key in state_dict): + model_state_dict = { + key.replace("model.", "", 1): value + for key, value in state_dict.items() + if key.startswith("model.") + } + else: + model_state_dict = state_dict + model.load_state_dict(model_state_dict) + + # Append to list + list_models.append(model) + return nn.ModuleList(list_models) + + def fuse_bboxes(self, images_batch, predictions_per_model: list[dict]): + """Fuse bboxes per sample in CPU in parallel.""" + # Fuse bboxes per sample in CPU in parallel + # Dispatch fusion tasks to executor (non-blocking) + # if self.config["fusion"]["method"] == "wbf" + + # n_jobs = -1 means Use ALL available CPU cores + # n_jobs = -2 means Use ALL available CPU cores except one + n_jobs = self.config["fusion"].get("n_jobs", -1) + + # Parallel WBF fusion + batch_size = len(images_batch) + results_batch = Parallel(n_jobs=n_jobs)( + delayed(weighted_boxes_fusion_in_pixels)( + images_batch[i].shape[-2:], # image height and width + [ + preds[i]["boxes"].cpu().numpy() + for preds in predictions_per_model + ], # same image across all models + [ + preds[i]["scores"].cpu().numpy() + for preds in predictions_per_model + ], + [ + preds[i]["labels"].cpu().numpy() + for preds in predictions_per_model + ], + self.config["fusion"]["iou_th_ensemble"], + self.config["fusion"]["skip_box_th"], + ) + for i in range(batch_size) + ) # list [(bboxes, scores, labels) * batch_size] + + fused_boxes_batch, fused_scores_batch, fused_labels_batch = ( + zip(*results_batch, strict=True) if results_batch else ([], [], []) + ) + + return fused_boxes_batch, fused_scores_batch, fused_labels_batch + + def predict_step(self, batch, batch_idx): + """Predict step for a single batch.""" + # ------------------------------ + # Run all models in ensemble in GPU + # TODO: can I vectorize this? + # https://docs.pytorch.org/tutorials/intermediate/ensembling.html + images_batch, _annotations_batch = batch + predictions_per_model = [ + model(images_batch) for model in self.list_models + ] # [num_models][batch_size] + + # ------------------------------ + # Fuse bboxes per sample in CPU in parallel + fused_boxes_batch, fused_scores_batch, fused_labels_batch = ( + self.fuse_bboxes(images_batch, predictions_per_model) + ) + + return fused_boxes_batch, fused_scores_batch, fused_labels_batch + + @staticmethod + def format_predictions(raw_predictions): + """Format as ethology detections dataset.""" + # Unzip data per batch + ( + fused_boxes_per_batch, + fused_scores_per_batch, + fused_labels_per_batch, + ) = zip(*raw_predictions, strict=True) # [n_batches][batch_size] + + # Flatten across all batches + fused_boxes = list(chain.from_iterable(fused_boxes_per_batch)) + fused_scores = list(chain.from_iterable(fused_scores_per_batch)) + fused_labels = list(chain.from_iterable(fused_labels_per_batch)) + + # Pad arrays to max n of detections per image + fused_boxes_padded = pad_to_max_first_dimension(fused_boxes) + fused_scores_padded = pad_to_max_first_dimension(fused_scores) + fused_labels_padded = pad_to_max_first_dimension(fused_labels) + + # Stack into arrays + bboxes_array = np.transpose( + np.stack(fused_boxes_padded), (0, -1, 1) + ) # image_id, space-4, id + scores_array = np.stack(fused_scores_padded) + labels_array = np.stack(fused_labels_padded) + + # ------------------------------ + # Return as ethology detections dataset + ds_variables = arrays_to_ds_variables( + bboxes_array, scores_array, labels_array + ) + detections_ds = xr.Dataset(data_vars=ds_variables) + + return detections_ds diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py new file mode 100644 index 00000000..f39956ed --- /dev/null +++ b/ethology/detectors/ensembles/utils.py @@ -0,0 +1,107 @@ +"""Utility functions for reshaping outputs of ensembles of detectors.""" +import numpy as np +import xarray as xr + + +def get_padding_width(array, max_n): + """Get pad width for array to max_n detections in the first dimension.""" + pad_width = array.ndim * [(0, 0)] + pad_width[0] = (0, max_n - array.shape[0]) # before, after + return pad_width + + +def pad_to_max_first_dimension(list_arrays): + """Pad arrays to maximum number across all arrays in the first dimension.""" + max_n_detections = max(array.shape[0] for array in list_arrays) + list_arrays_padded = [ + np.pad( + arr, + get_padding_width(arr, max_n_detections), + mode="constant", + constant_values=np.nan, + ) + for arr in list_arrays + ] + return list_arrays_padded + + +def arrays_to_ds_variables( + bboxes_x1y1x2y2_array: np.ndarray, + scores_array: np.ndarray, + labels_array: np.ndarray, + id_array: np.ndarray | None = None, +) -> dict[str, xr.DataArray]: + """Convert arrays to dictionary of dataset variables. + + Parameters + ---------- + bboxes_x1y1x2y2_array: np.ndarray + Array of bounding box coordinates with shape + [Nimages, 4, Nmax_detections], in format x1y1x2y2 in units of pixels. + Nmax_detections is the maximum number of detections per image. + scores_array: np.ndarray + Array of shape [Nimages, Nmax_detections] + labels_array: np.ndarray + Array of shape [Nimages, Nmax_detections] + id_array: np.ndarray | None, optional + Array of shape [Nmax_detections]. If None, will be set to + range(Nmax_detections). + """ + n_images = bboxes_x1y1x2y2_array.shape[0] + n_max_detections = bboxes_x1y1x2y2_array.shape[-1] + if id_array is None: + id_array = np.arange(n_max_detections) + + # centroid dataarray (x, y) + centroid_da = xr.DataArray( + data=0.5 + * ( + bboxes_x1y1x2y2_array[:, 0:2, :] + bboxes_x1y1x2y2_array[:, 2:4, :] + ), + dims=["image_id", "space", "id"], + coords={ + "image_id": np.arange(n_images), + "space": ["x", "y"], + "id": id_array, + }, + ) + + # shape dataarray (width, height) + shape_da = xr.DataArray( + data=( + bboxes_x1y1x2y2_array[:, 2:4, :] - bboxes_x1y1x2y2_array[:, 0:2, :] + ), + dims=["image_id", "space", "id"], + coords={ + "image_id": np.arange(n_images), + "space": ["x", "y"], + "id": id_array, + }, + ) + + # confidence dataarray + confidence_da = xr.DataArray( + data=scores_array, + dims=["image_id", "id"], + coords={ + "image_id": np.arange(n_images), + "id": id_array, + }, + ) + + # label dataarray + label_da = xr.DataArray( + data=labels_array, + dims=["image_id", "id"], + coords={ + "image_id": np.arange(n_images), + "id": id_array, + }, + ) + + return { + "position": centroid_da, + "shape": shape_da, + "confidence": confidence_da, + "label": label_da, + } diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py new file mode 100644 index 00000000..564b8ca5 --- /dev/null +++ b/examples/ensemble_of_detectors.py @@ -0,0 +1,172 @@ +# %% +# imports + +from pathlib import Path + +import torch +import torchvision.transforms.v2 as transforms +import yaml +from lightning import Trainer +from torch.utils.data import DataLoader +from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 + +from ethology.detectors.ensembles.models import EnsembleDetector + +# from ethology.detectors.evaluate import compute_precision_recall_ds +# from ethology.io.annotations import load_bboxes + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +# Helper functions +def create_coco_dataset( + images_dir: str | Path, + annotations_file: str | Path, + composed_transform: transforms.Compose, +) -> CocoDetection: + """Create a COCO dataset for object detection. + + Note: transforms are applied to the full dataset. If the dataset + is later split, all splits will have the same transforms. + """ + dataset_coco = CocoDetection( + root=images_dir, + annFile=annotations_file, + transforms=composed_transform, + ) + + # wrap dataset for transforms v2 + dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco) + + return dataset_transformed + + +def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: + """Collate function for dataloader with varying number of bounding boxes. + + A custom function is needed for detection + because the number of bounding boxes varies + between images of the same batch. + See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop + + Parameters + ---------- + batch : tuple + a tuple of 2 tuples, the first one holding all images in the batch, + and the second one holding the corresponding annotations. + + Returns + ------- + tuple + a tuple of length = batch size, made up of (image, annotations) + tuples. + + """ + return tuple(zip(*batch, strict=True)) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Input data + +dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full") +annotations_dir = dataset_dir / "annotations" +annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json" + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Define a dataloader +# Define transforms for inference +inference_transforms = transforms.Compose( + [ + transforms.ToImage(), + transforms.ToDtype(torch.float32, scale=True), + ] +) + +# Create COCO dataset +# TODO: convert from ethology detections dataset to COCO dataset +dataset_coco = create_coco_dataset( + images_dir=Path(dataset_dir) / "frames", + annotations_file=annotations_file_path, + composed_transform=inference_transforms, +) + +# dataloader +dataloader = DataLoader( + dataset_coco, + batch_size=12, + shuffle=False, + num_workers=4, + collate_fn=collate_fn_varying_n_bboxes, + persistent_workers=True, + # multiprocessing_context="fork" + # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available() + # else None, # see https://github.com/pytorch/pytorch/issues/87688 +) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Define a YAML config file for the ensemble of trained detectors + +config = { + "models": { + "model_class": "fasterrcnn_resnet50_fpn_v2", + # imported from torchvision.models.detection + "model_kwargs": { + "num_classes": 2, + "weights": None, # null in YAML becomes None in Python + "weights_backbone": None, + }, + "checkpoints": [ + "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt", + "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt", + ], + }, + "fusion": { + "method": "wbf", + "iou_th_ensemble": 0.5, + "skip_box_th": 0.0001, + "n_jobs": 2, # workers for joblib.Parallel, n_workers should be <= number of CPU cores + # "confidence_threshold_post_fusion": 0.0, + # "max_n_detections": 300 + }, +} +config_file = "ensemble_of_detectors.yaml" +with open(config_file, "w") as f: + yaml.dump(config, f, sort_keys=False) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Load the ensemble of detectors +ensemble_detector = EnsembleDetector(config_file) +print(f"Ensemble detector is on device: {ensemble_detector.device}") + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Run the ensemble of detectors on a dataset +# Use Trainer for inference (this sets the device flexibly) +trainer = Trainer(accelerator="gpu", devices=1, logger=False) +raw_predictions = trainer.predict(ensemble_detector, dataloader) + +# format predictions as ethology detections dataset +fused_detections_ds = ensemble_detector.format_predictions(raw_predictions) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# # Evaluate the ensemble model +# # - load ground truth +# # - compute metrics + +# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") + + +# fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds( +# pred_bboxes_ds=fused_detections_ds, +# gt_bboxes_ds=gt_bboxes_ds, +# iou_threshold=0.1, # change to 0.5? +# ) + + +# print( +# "Ensemble model with confidence threshold post fusion: " +# f"{ensemble_detector.config['fusion']['confidence_threshold_post_fusion']}" +# ) +# print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}") +# print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}") diff --git a/pyproject.toml b/pyproject.toml index a2d5b2e9..ad9b345e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,13 +19,19 @@ classifiers = [ "License :: OSI Approved :: BSD License", ] dependencies = [ - "movement", + "xarray", + "pooch", + "pyyaml", "pandera[pandas]", "pycocotools", + "movement", "scikit-learn", "torch", "torchvision", + "ensemble-boxes", + "lightning", "loguru", + "joblib", ] [project.urls] From 77fc4fe380b0e7fad499ae52a6613df332924be7 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 13 Nov 2025 21:27:43 +0000 Subject: [PATCH 02/39] Add evalution --- ethology/detectors/__init__.py | 0 ethology/detectors/evaluate.py | 245 ++++++++++++++++++++++++++++++ examples/ensemble_of_detectors.py | 98 +++++++++--- 3 files changed, 322 insertions(+), 21 deletions(-) create mode 100644 ethology/detectors/__init__.py create mode 100644 ethology/detectors/evaluate.py diff --git a/ethology/detectors/__init__.py b/ethology/detectors/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/ethology/detectors/evaluate.py b/ethology/detectors/evaluate.py new file mode 100644 index 00000000..f991420c --- /dev/null +++ b/ethology/detectors/evaluate.py @@ -0,0 +1,245 @@ +"""Utilities for evaluating detectors.""" + +import numpy as np +import torch +import torchvision.ops as ops +import xarray as xr +from scipy.optimize import linear_sum_assignment + + +def evaluate_detections_hungarian_ds( + pred_bboxes_ds: xr.Dataset, + gt_bboxes_ds: xr.Dataset, + iou_threshold: float, +) -> tuple[xr.Dataset, xr.Dataset]: + """Compute true positives, false positives, and missed detections. + + Uses Hungarian algorithm for matching. + """ + # Add xy_min and xy_max if not present + if all( + [ + var_str not in pred_bboxes_ds.variables + for var_str in ["xy_min", "xy_max"] + ] + ): + pred_bboxes_ds = _add_bboxes_min_max_corners(pred_bboxes_ds) + + if all( + [ + var_str not in gt_bboxes_ds.variables + for var_str in ["xy_min", "xy_max"] + ] + ): + gt_bboxes_ds = _add_bboxes_min_max_corners(gt_bboxes_ds) + + # Prepare input for hungarian + pred_bboxes_x1y1_x2y2 = xr.concat( + [pred_bboxes_ds.xy_min, pred_bboxes_ds.xy_max], dim="space" + ).transpose("image_id", "id", "space") + + gt_bboxes_x1y1_x2y2 = xr.concat( + [gt_bboxes_ds.xy_min, gt_bboxes_ds.xy_max], dim="space" + ).transpose("image_id", "id", "space") + + # rename id dimension in gt_bboxes_x1y1_x2y2 + gt_bboxes_x1y1_x2y2 = gt_bboxes_x1y1_x2y2.rename({"id": "id_gt"}) + + # Run hungarian vectorized + tp_array, fp_array, md_array, iou_tp_array = xr.apply_ufunc( + _evaluate_detections_hungarian_arrays, + pred_bboxes_x1y1_x2y2, + gt_bboxes_x1y1_x2y2, + kwargs={"iou_threshold": iou_threshold}, + input_core_dims=[ + ["id", "space"], + ["id_gt", "space"], + ], + output_core_dims=[ + ["id"], + ["id"], + ["id_gt"], + ["id"], + ], + vectorize=True, + exclude_dims={"id", "id_gt"}, + ) + + # Add to datasets + pred_bboxes_ds["tp"] = xr.DataArray(tp_array, dims=["image_id", "id"]) + pred_bboxes_ds["fp"] = xr.DataArray(fp_array, dims=["image_id", "id"]) + pred_bboxes_ds["iou_tp"] = xr.DataArray( + iou_tp_array, dims=["image_id", "id"] + ) + + # rename id dimension in md_array + md_array = md_array.rename({"id_gt": "id"}) + gt_bboxes_ds["md"] = xr.DataArray(md_array, dims=["image_id", "id"]) + + return pred_bboxes_ds, gt_bboxes_ds + + +def _evaluate_detections_hungarian_arrays( + pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, iou_threshold: float +) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Compute true positives, false positives, and missed detections. + + Uses Hungarian algorithm for matching and takes arrays of bboxes as input + in x1y1x2y2 format. + + Parameters + ---------- + pred_bboxes : np.ndarray + An array of prediction bounding boxes with the first four columns being + the coordinates of the bounding box in the format [x1, y1, x2, y2] + gt_bboxes : np.ndarray + An array of ground truth bounding boxes with the first four columns + being the coordinates of the bounding box in the format + [x1, y1, x2, y2] + iou_threshold : float + IoU threshold for considering a detection as true positive + + Returns + ------- + tuple + A tuple of four boolean arrays: + - true_positives: True for each predicted bbox that is a true positive + - false_positives: True for each predicted bbox that is a false + positive + - missed_detections: True for each ground truth bbox that is missed + - true_positives_iou: IoU of each true positive + + Notes + ----- + The output arrays are padded with False to the length of the original + arrays. This means that for example where the true_positives array is + False, that does not necessarily mean that the prediction is a false + positive. The same applies for the true_positives_iou array, which is + padded with nan. + + """ + # Remove nan values + n_pred_bboxes_padded = pred_bboxes.shape[0] + n_gt_bboxes_padded = gt_bboxes.shape[0] + pred_bboxes = pred_bboxes[~np.isnan(pred_bboxes).any(axis=1), :] + gt_bboxes = gt_bboxes[~np.isnan(gt_bboxes).any(axis=1), :] + + # Initialize output arrays + true_positives = np.zeros(len(pred_bboxes), dtype=bool) + false_positives = np.zeros(len(pred_bboxes), dtype=bool) + matched_gts = np.zeros(len(gt_bboxes), dtype=bool) + missed_detections = np.zeros(len(gt_bboxes), dtype=bool) # unmatched gts + + true_positives_iou = np.zeros(len(pred_bboxes), dtype=float) + + # cast as a tensor if not already + if not isinstance(pred_bboxes, torch.Tensor): + pred_bboxes = torch.from_numpy(pred_bboxes).float() + if not isinstance(gt_bboxes, torch.Tensor): + gt_bboxes = torch.from_numpy(gt_bboxes).float() + + if len(pred_bboxes) > 0 and len(gt_bboxes) > 0: + # Compute IoU matrix (pred_bboxes x gt_bboxes) + iou_matrix = ops.box_iou(pred_bboxes[:, :4], gt_bboxes).cpu().numpy() + # iou_matrix[np.isnan(iou_matrix)] = -np.inf + + # Use Hungarian algorithm to find optimal assignment + pred_indices, gt_indices = linear_sum_assignment( + iou_matrix, maximize=True + ) + + # Mark true positives and false positives based on optimal assignment + for pred_idx, gt_idx in zip(pred_indices, gt_indices, strict=True): + if iou_matrix[pred_idx, gt_idx] > iou_threshold: + true_positives[pred_idx] = True + matched_gts[gt_idx] = True + true_positives_iou[pred_idx] = iou_matrix[pred_idx, gt_idx] + else: + false_positives[pred_idx] = True + + # Mark unmatched predictions as false positives + false_positives[~true_positives] = True + + # Mark unmatched ground truth as missed detections + missed_detections[~matched_gts] = True + + elif len(pred_bboxes) == 0 and len(gt_bboxes) > 0: + # No predictions, all ground truth are missed + missed_detections[:] = True + elif len(pred_bboxes) > 0 and len(gt_bboxes) == 0: + # No ground truth, all predictions are false positives + false_positives[:] = True + + # Pad tp, fp for pred_bboxes with False + tp_fp_pred_bboxes_padded: tuple[np.ndarray, ...] = () + for output in [true_positives, false_positives]: + output_padded = np.pad( + output, + (0, n_pred_bboxes_padded - len(output)), + mode="constant", + constant_values=False, + ) + tp_fp_pred_bboxes_padded += (output_padded,) + + # Pad true_positives_iou for pred_bboxes with nan + true_positives_iou_padded = np.pad( + true_positives_iou, + (0, n_pred_bboxes_padded - len(true_positives_iou)), + mode="constant", + constant_values=np.nan, + ) + + # Pad results for gt_bboxes with False + missed_detections_padded = np.pad( + missed_detections, + (0, n_gt_bboxes_padded - len(missed_detections)), + mode="constant", + constant_values=False, + ) + return tp_fp_pred_bboxes_padded + ( + missed_detections_padded, + true_positives_iou_padded, + ) + + +def compute_precision_recall_ds( + pred_bboxes_ds: xr.Dataset, + gt_bboxes_ds: xr.Dataset, + iou_threshold: float, +) -> tuple[xr.Dataset, xr.Dataset]: + """Compute precision and recall per image.""" + # Compute true positives, false positives, and missed detections + pred_bboxes_ds, gt_bboxes_ds = evaluate_detections_hungarian_ds( + pred_bboxes_ds=pred_bboxes_ds, + gt_bboxes_ds=gt_bboxes_ds, + iou_threshold=iou_threshold, + ) + + # Compute precision and recall per image + precision_per_img = pred_bboxes_ds.tp.sum(dim="id") / ( + pred_bboxes_ds.tp.sum(dim="id") + pred_bboxes_ds.fp.sum(dim="id") + ) + recall_per_img = pred_bboxes_ds.tp.sum(dim="id") / ( + pred_bboxes_ds.tp.sum(dim="id") + gt_bboxes_ds.md.sum(dim="id") + ) + + # Add to datasets + pred_bboxes_ds["precision"] = precision_per_img + pred_bboxes_ds["recall"] = recall_per_img + + return pred_bboxes_ds, gt_bboxes_ds + + +def _add_bboxes_min_max_corners(ds): + """Add xy_min and xy_max arrays to ds. + + # Compare to torchvision.ops.box_convert in testing? + box_convert( + torch.from_numpy(np.c_[ds.position.T, ds.shape.T]), + in_fmt="cxcywh", + out_fmt="xyxy", + ) + """ + ds["xy_min"] = ds.position - 0.5 * ds.shape + ds["xy_max"] = ds.position + 0.5 * ds.shape + return ds diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index 564b8ca5..39a71dd3 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -11,12 +11,12 @@ from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 from ethology.detectors.ensembles.models import EnsembleDetector - -# from ethology.detectors.evaluate import compute_precision_recall_ds -# from ethology.io.annotations import load_bboxes +from ethology.detectors.evaluate import compute_precision_recall_ds +from ethology.io.annotations import load_bboxes # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + # Helper functions def create_coco_dataset( images_dir: str | Path, @@ -106,6 +106,11 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Define a YAML config file for the ensemble of trained detectors +experiment_ID = "617393114420881798" +ml_runs_experiment_dir = ( + Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID +) +last_ckpt = Path("checkpoints") / "last.ckpt" config = { "models": { @@ -117,15 +122,43 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: "weights_backbone": None, }, "checkpoints": [ - "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt", - "/home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt", + str( + ml_runs_experiment_dir + / "f348d9d196934073bece1b877cbc4d38" + / last_ckpt + ), # above_0th + str( + ml_runs_experiment_dir + / "879d2f77e2b24adcb06b87d2fede6a04" + / last_ckpt + ), # above_1st + str( + ml_runs_experiment_dir + / "75583ec227e3444ab692b99c64795325" + / last_ckpt + ), # above_5th + str( + ml_runs_experiment_dir + / "4acc37206b1e4f679d535c837bee2c2f" + / last_ckpt + ), # above_10th + str( + ml_runs_experiment_dir + / "fdcf88fcbcc84fbeb94b45ca6b6f8914" + / last_ckpt + ), # above_25th + str( + ml_runs_experiment_dir + / "daa05ded0ea047388c9134bf044061c5" + / last_ckpt + ), # above_50th ], }, "fusion": { "method": "wbf", "iou_th_ensemble": 0.5, "skip_box_th": 0.0001, - "n_jobs": 2, # workers for joblib.Parallel, n_workers should be <= number of CPU cores + "n_jobs": -1, # workers for joblib.Parallel, n_workers should be <= number of CPU cores # "confidence_threshold_post_fusion": 0.0, # "max_n_detections": 300 }, @@ -150,23 +183,46 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# # Evaluate the ensemble model -# # - load ground truth -# # - compute metrics +# Remove low confidence detections +confidence_threshold_post_fusion = 0.5 +fused_detections_ds_ = fused_detections_ds.where( + fused_detections_ds.confidence >= confidence_threshold_post_fusion +) -# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Evaluate the ensemble model +# - load ground truth +# - compute metrics +gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") -# fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds( -# pred_bboxes_ds=fused_detections_ds, -# gt_bboxes_ds=gt_bboxes_ds, -# iou_threshold=0.1, # change to 0.5? -# ) +iou_threshold_tp = 0.25 +fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds( + pred_bboxes_ds=fused_detections_ds_, + gt_bboxes_ds=gt_bboxes_ds, + iou_threshold=iou_threshold_tp, +) +# All models on full August dataset, without removing low confidence detections: +# confidence_threshold_post_fusion = 0.0 +# Precision: 0.5920 +# Recall: 0.8455 +# --- +# confidence_threshold_post_fusion = 0.4 +# Precision: 0.8339 +# Recall: 0.7177 +# --- +# confidence_threshold_post_fusion = 0.5 +# Precision: 0.8714 +# Recall: 0.6624 +# --- + +print( + "Ensemble model with confidence threshold post fusion: " + f"{confidence_threshold_post_fusion:.2f}" +) +print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}") +print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}") -# print( -# "Ensemble model with confidence threshold post fusion: " -# f"{ensemble_detector.config['fusion']['confidence_threshold_post_fusion']}" -# ) -# print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}") -# print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}") +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Plot calibration curve From e70f70f4354be23ef4965bb50d4bad455e39035a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 14 Nov 2025 20:57:44 +0000 Subject: [PATCH 03/39] Split ensemble inference and fusion --- ethology/detectors/ensembles/fusion.py | 300 ++++++++++++++++++++++--- ethology/detectors/ensembles/models.py | 163 +++++++------- ethology/detectors/ensembles/utils.py | 85 +------ examples/ensemble_of_detectors.py | 50 ++++- 4 files changed, 391 insertions(+), 207 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 58521a69..e230aa18 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -1,40 +1,270 @@ """Wrappers around ensemble-boxes fusion functions.""" + import numpy as np +import xarray as xr from ensemble_boxes import weighted_boxes_fusion -def weighted_boxes_fusion_in_pixels( - image_height_width: tuple[int, int], - boxes_list: list[np.ndarray], - scores_list: list[np.ndarray], - labels_list: list[np.ndarray], - iou_thr: float, - skip_box_thr: float, -): - """Fuse bboxes for a single image and return in pixels.""" - # Normalize boxes using image shape - image_height, image_width = image_height_width - boxes_list = [ - boxes - / np.array([image_width, image_height, image_width, image_height]) - if len(boxes) > 0 - else boxes - for boxes in boxes_list - ] - - # Apply WBF - fused_boxes, fused_scores, fused_labels = weighted_boxes_fusion( - boxes_list, - scores_list, - labels_list, - iou_thr=iou_thr, - skip_box_thr=skip_box_thr, - ) - - # Denormalize boxes - # Format of returned bboxes is x1y1x2y2 in pixels like fasterrcnn - fused_boxes = fused_boxes * np.array( - [image_width, image_height, image_width, image_height] - ) - - return fused_boxes, fused_scores, fused_labels \ No newline at end of file +# TODO: review shapes are ok in docstring +def _weighted_boxes_fusion_arrays( + position, # bboxes_x1y1: np.ndarray, # model, annot, 4 + shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 + confidence: np.ndarray, # model, annot + label: np.ndarray, # model, annot + image_width_height: np.ndarray, # = np.array([4096, 2160]), + iou_thr_ensemble: float = 0.5, + skip_box_thr: float = 0.0001, + max_n_detections: int = 300, + # confidence_th_post_fusion: float = 0.7, +) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: + """Wrap weighted boxes fusion to receive arrays as input. + + Weighted boxes fusion fused boxes of one image. + + Parameters + ---------- + position: np.ndarray + Detected positions of bounding boxes in a single image, with shape + 2, n_annot, n_models. + shape: np.ndarray + Detected shapes of bounding boxes in a single image, with shape + 2, n_annot, n_models. + confidence: np.ndarray + Confidence scores for each bounding box, with shape + n_annotations, n_models. + label: np.ndarray + Labels for each bounding box, with shape n_annotations, n_models. + image_width_height: np.ndarray + Width and height of the image, with shape 2. + iou_thr_ensemble: float + IoU threshold for detections to be considered for fusion. + skip_box_thr: float + Threshold for skipping boxes with confidence below this value. + max_n_detections: int + Fused bounding boxes arrays are padded to this total number of boxes. + Its value should be larger than the expected maximum number of + detections per image after fusing across models. + confidence_th_post_fusion: float + Threshold for removing fused detections whose confidence is below + this value. + + Returns + ------- + tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray] + Tuple of xr.DataArrays containing the fused detections. The arrays + are padded to max_n_detections and contain the data for the centroid, + shape, confidence and label of the fused detections. + + """ + # Prepare boxes array --> (position, shape) to x1y1x2y normalised + bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None] + bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None] + bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2]) + # 4, n_annot, n_models + + # Get list of bboxes per model + # arrays need to be tall for WBF + n_models = bboxes_x1y1_x2y2_normalised.shape[-1] + list_bboxes_per_model = [ + arr.squeeze() + for arr in np.split(bboxes_x1y1_x2y2_normalised, n_models, axis=-1) + ] + list_confidence_per_model = [ + arr.squeeze() for arr in np.split(confidence, n_models, axis=-1) + ] + list_label_per_model = [ + arr.squeeze() for arr in np.split(label, n_models, axis=-1) + ] + + # Remove rows with nan coordinates + list_bboxes_per_model = [ + arr[:, ~np.any(np.isnan(arr), axis=0)].T + for arr in list_bboxes_per_model + ] + list_confidence_per_model = [ + conf_arr[: bbox_arr.shape[0]] + for bbox_arr, conf_arr in zip( + list_bboxes_per_model, + list_confidence_per_model, + strict=True, + ) + ] + list_label_per_model = [ + label_arr[: bbox_arr.shape[0]] + for bbox_arr, label_arr in zip( + list_bboxes_per_model, + list_label_per_model, + strict=True, + ) + ] + # ------------------------------------ + # Run WBF on one image + ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( + weighted_boxes_fusion( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, + iou_thr=iou_thr_ensemble, + skip_box_thr=skip_box_thr, + ) + ) + + # ------------------------------------ + # Undo boxes x1y1 x2y2 normalization + ensemble_x1y1_x2y2 = ensemble_x1y1_x2y2_norm * np.tile( + image_width_height, (1, 2) + ) + + # Combine x1y1, x2y2, scores and labels in one array + ensemble_x1y2_x2y2_scores_labels = np.c_[ + ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels + ] + + # Remove rows with nan coordinates + ensemble_x1y2_x2y2_scores_labels = ensemble_x1y2_x2y2_scores_labels[ + ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1) + ] + + # Pad combined array to max_n_detections + # (this is required to concatenate across image_ids + ensemble_x1y2_x2y2_scores_labels = np.pad( + ensemble_x1y2_x2y2_scores_labels, + ( + (0, max_n_detections - ensemble_x1y2_x2y2_scores_labels.shape[0]), + (0, 0), + ), + "constant", + constant_values=np.nan, + ) + + # Format output as xarray dataarrays + centroid, shape, confidence, label = _x1y1_x2y2_as_da_tuple( + ensemble_x1y2_x2y2_scores_labels[:, 0:4], + ensemble_x1y2_x2y2_scores_labels[:, 4], + ensemble_x1y2_x2y2_scores_labels[:, 5], + ) + + return centroid, shape, confidence, label + + +def _x1y1_x2y2_as_da_tuple( + x1y1_x2y2_array: np.ndarray, + scores_array: np.ndarray, + labels_array: np.ndarray, + id_array: np.ndarray | None = None, +) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: + """Reshape detections / tracks array as xarray dataset. + + Input is detections array with shape [N, 4], x1y1x2y2 in pixels + """ + n_detections = x1y1_x2y2_array.shape[0] + if id_array is None: + id_array = np.arange(n_detections) + + # centroid dataarray + centroid_da = xr.DataArray( + data=0.5 + * ( + x1y1_x2y2_array[:, 0:2] + x1y1_x2y2_array[:, 2:4] + ).T, # space, annot ID + dims=["space", "id"], + coords={ + "space": ["x", "y"], + "id": id_array, + }, + ) + + # shape dataarray + shape_da = xr.DataArray( + data=( + x1y1_x2y2_array[:, 2:4] - x1y1_x2y2_array[:, 0:2] + ).T, # space, annot ID + dims=["space", "id"], + coords={ + "space": ["x", "y"], + "id": id_array, + }, + ) + + # confidence dataarray + confidence_da = xr.DataArray( + data=scores_array, + dims=["id"], + coords={"id": id_array}, + ) + + # label dataarray + label_da = xr.DataArray( + data=labels_array, + dims=["id"], + coords={"id": id_array}, + ) + + return centroid_da, shape_da, confidence_da, label_da + + +def WBF_across_models( + ensemble_detections_ds: xr.Dataset, + image_width_height: np.ndarray, + iou_thr_ensemble: float = 0.5, + skip_box_thr: float = 0.0001, + max_n_detections: int = 300, +) -> xr.Dataset: + """Fuse detections across models using WBF.""" + + wbf_kwargs = { + "iou_thr_ensemble": iou_thr_ensemble, + "skip_box_thr": skip_box_thr, + "max_n_detections": max_n_detections, + "image_width_height": image_width_height, + } + + # Run WBF across image_id + centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( + xr.apply_ufunc( + _weighted_boxes_fusion_arrays, + ensemble_detections_ds.position, # .data array is passed + ensemble_detections_ds.shape, + ensemble_detections_ds.confidence, + ensemble_detections_ds.label, + kwargs=wbf_kwargs, + input_core_dims=[ # do not broadcast across these + ["space", "id", "model"], + ["space", "id", "model"], + ["id", "model"], + ["id", "model"], + ], + output_core_dims=[ + ["space", "id"], + ["space", "id"], + ["id"], + ["id"], + ], + vectorize=True, + # loop over non-core dims (i.e. image_id); + # assumes function only takes arrays over core dims as input + exclude_dims={"id"}, + # to allow dimensions that change size btw input and output + ) + ) + + # Remove pad across annotations + centroid_fused_da = centroid_fused_da.dropna(dim="id", how="all") + shape_fused_da = shape_fused_da.dropna(dim="id", how="all") + confidence_fused_da = confidence_fused_da.dropna(dim="id", how="all") + label_fused_da = label_fused_da.dropna(dim="id", how="all") + + # Pad labels with -1 rather than nan + label_fused_da = label_fused_da.fillna(-1).astype(int) + + # Return a dataset + # FIX: why is id not a coordinate in the output dataset? + # FIX: order of dimensions should be image_id, space, id + return xr.Dataset( + data_vars={ + "position": centroid_fused_da, + "shape": shape_fused_da, + "confidence": confidence_fused_da, + "label": label_fused_da, + } + ) diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index a42ce1ef..e738cc96 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -9,14 +9,9 @@ import torchvision.models.detection as detection_models import xarray as xr import yaml -from joblib import Parallel, delayed from lightning import LightningModule -from ethology.detectors.ensembles.fusion import weighted_boxes_fusion_in_pixels -from ethology.detectors.ensembles.utils import ( - arrays_to_ds_variables, - pad_to_max_first_dimension, -) +from ethology.detectors.ensembles.utils import pad_to_max_first_dimension class EnsembleDetector(LightningModule): @@ -26,6 +21,7 @@ class EnsembleDetector(LightningModule): ---------- config_file: str Path to the YAML config file. + """ def __init__(self, config_file: str | Path): @@ -69,45 +65,6 @@ def load_models(self) -> nn.ModuleList: list_models.append(model) return nn.ModuleList(list_models) - def fuse_bboxes(self, images_batch, predictions_per_model: list[dict]): - """Fuse bboxes per sample in CPU in parallel.""" - # Fuse bboxes per sample in CPU in parallel - # Dispatch fusion tasks to executor (non-blocking) - # if self.config["fusion"]["method"] == "wbf" - - # n_jobs = -1 means Use ALL available CPU cores - # n_jobs = -2 means Use ALL available CPU cores except one - n_jobs = self.config["fusion"].get("n_jobs", -1) - - # Parallel WBF fusion - batch_size = len(images_batch) - results_batch = Parallel(n_jobs=n_jobs)( - delayed(weighted_boxes_fusion_in_pixels)( - images_batch[i].shape[-2:], # image height and width - [ - preds[i]["boxes"].cpu().numpy() - for preds in predictions_per_model - ], # same image across all models - [ - preds[i]["scores"].cpu().numpy() - for preds in predictions_per_model - ], - [ - preds[i]["labels"].cpu().numpy() - for preds in predictions_per_model - ], - self.config["fusion"]["iou_th_ensemble"], - self.config["fusion"]["skip_box_th"], - ) - for i in range(batch_size) - ) # list [(bboxes, scores, labels) * batch_size] - - fused_boxes_batch, fused_scores_batch, fused_labels_batch = ( - zip(*results_batch, strict=True) if results_batch else ([], [], []) - ) - - return fused_boxes_batch, fused_scores_batch, fused_labels_batch - def predict_step(self, batch, batch_idx): """Predict step for a single batch.""" # ------------------------------ @@ -115,50 +72,88 @@ def predict_step(self, batch, batch_idx): # TODO: can I vectorize this? # https://docs.pytorch.org/tutorials/intermediate/ensembling.html images_batch, _annotations_batch = batch - predictions_per_model = [ + raw_prediction_dicts_per_model = [ model(images_batch) for model in self.list_models ] # [num_models][batch_size] - # ------------------------------ - # Fuse bboxes per sample in CPU in parallel - fused_boxes_batch, fused_scores_batch, fused_labels_batch = ( - self.fuse_bboxes(images_batch, predictions_per_model) - ) + # Transpose to [batch_size][num_models] for easier downstream + # processing + raw_prediction_dicts_per_sample = [ + list(one_sample_all_models) + for one_sample_all_models in zip( + *raw_prediction_dicts_per_model, strict=True + ) + ] # [batch_size][num_models] + + return raw_prediction_dicts_per_sample + + def format_predictions(self) -> xr.Dataset: + """Format as ethology detections dataset with model axis.""" + # Get results from trainer + raw_predictions_per_model = self.trainer.predict_loop.predictions + + # Flatten batches + raw_prediction_dicts_per_sample = list( + chain.from_iterable(raw_predictions_per_model) + ) # [sample][model] + + # Parse output from dicts + output_per_sample = {"boxes": [], "scores": [], "labels": []} + for ky in output_per_sample: + output_per_sample[ky] = [ + [sample[m][ky] for m in range(len(self.list_models))] + for sample in raw_prediction_dicts_per_sample + ] # [sample][model] + + # Pad across models and across image_ids + fill_value = {"boxes": np.nan, "scores": np.nan, "labels": -1} + output_per_sample_padded = {ky: [] for ky in output_per_sample} + for ky in output_per_sample_padded: + output_per_sample_padded[ky] = pad_to_max_first_dimension( + [ + # pad across models + np.stack( + pad_to_max_first_dimension( + output_one_sample, fill_value[ky] + ), + axis=-1, + ) + for output_one_sample in output_per_sample[ky] + ], + fill_value[ky], + ) - return fused_boxes_batch, fused_scores_batch, fused_labels_batch - - @staticmethod - def format_predictions(raw_predictions): - """Format as ethology detections dataset.""" - # Unzip data per batch - ( - fused_boxes_per_batch, - fused_scores_per_batch, - fused_labels_per_batch, - ) = zip(*raw_predictions, strict=True) # [n_batches][batch_size] - - # Flatten across all batches - fused_boxes = list(chain.from_iterable(fused_boxes_per_batch)) - fused_scores = list(chain.from_iterable(fused_scores_per_batch)) - fused_labels = list(chain.from_iterable(fused_labels_per_batch)) - - # Pad arrays to max n of detections per image - fused_boxes_padded = pad_to_max_first_dimension(fused_boxes) - fused_scores_padded = pad_to_max_first_dimension(fused_scores) - fused_labels_padded = pad_to_max_first_dimension(fused_labels) - - # Stack into arrays + # Stack and reorder dimensions bboxes_array = np.transpose( - np.stack(fused_boxes_padded), (0, -1, 1) - ) # image_id, space-4, id - scores_array = np.stack(fused_scores_padded) - labels_array = np.stack(fused_labels_padded) + np.stack(output_per_sample_padded["boxes"]), + (0, -2, 1, -1), + ) + scores_array = np.stack(output_per_sample_padded["scores"]) + labels_array = np.stack(output_per_sample_padded["labels"]) + # arrays of shape (image_id, 4/1, n_max_detections, n_models) + + # Compute centroid and shape arrays + centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4]) + shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2] - # ------------------------------ # Return as ethology detections dataset - ds_variables = arrays_to_ds_variables( - bboxes_array, scores_array, labels_array + max_n_detections = bboxes_array.shape[-2] + n_images = bboxes_array.shape[0] + + return xr.Dataset( + data_vars={ + "position": ( + ["image_id", "space", "id", "model"], + centroid_array, + ), + "shape": (["image_id", "space", "id", "model"], shape_array), + "confidence": (["image_id", "id", "model"], scores_array), + "label": (["image_id", "id", "model"], labels_array), + }, + coords={ + "image_id": np.arange(n_images), + "space": ["x", "y"], + "id": np.arange(max_n_detections), + "model": np.arange(len(self.list_models)), + }, ) - detections_ds = xr.Dataset(data_vars=ds_variables) - - return detections_ds diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py index f39956ed..53f24dce 100644 --- a/ethology/detectors/ensembles/utils.py +++ b/ethology/detectors/ensembles/utils.py @@ -1,4 +1,5 @@ """Utility functions for reshaping outputs of ensembles of detectors.""" + import numpy as np import xarray as xr @@ -10,7 +11,7 @@ def get_padding_width(array, max_n): return pad_width -def pad_to_max_first_dimension(list_arrays): +def pad_to_max_first_dimension(list_arrays, fill_value=np.nan): """Pad arrays to maximum number across all arrays in the first dimension.""" max_n_detections = max(array.shape[0] for array in list_arrays) list_arrays_padded = [ @@ -18,90 +19,10 @@ def pad_to_max_first_dimension(list_arrays): arr, get_padding_width(arr, max_n_detections), mode="constant", - constant_values=np.nan, + constant_values=fill_value, ) for arr in list_arrays ] return list_arrays_padded -def arrays_to_ds_variables( - bboxes_x1y1x2y2_array: np.ndarray, - scores_array: np.ndarray, - labels_array: np.ndarray, - id_array: np.ndarray | None = None, -) -> dict[str, xr.DataArray]: - """Convert arrays to dictionary of dataset variables. - - Parameters - ---------- - bboxes_x1y1x2y2_array: np.ndarray - Array of bounding box coordinates with shape - [Nimages, 4, Nmax_detections], in format x1y1x2y2 in units of pixels. - Nmax_detections is the maximum number of detections per image. - scores_array: np.ndarray - Array of shape [Nimages, Nmax_detections] - labels_array: np.ndarray - Array of shape [Nimages, Nmax_detections] - id_array: np.ndarray | None, optional - Array of shape [Nmax_detections]. If None, will be set to - range(Nmax_detections). - """ - n_images = bboxes_x1y1x2y2_array.shape[0] - n_max_detections = bboxes_x1y1x2y2_array.shape[-1] - if id_array is None: - id_array = np.arange(n_max_detections) - - # centroid dataarray (x, y) - centroid_da = xr.DataArray( - data=0.5 - * ( - bboxes_x1y1x2y2_array[:, 0:2, :] + bboxes_x1y1x2y2_array[:, 2:4, :] - ), - dims=["image_id", "space", "id"], - coords={ - "image_id": np.arange(n_images), - "space": ["x", "y"], - "id": id_array, - }, - ) - - # shape dataarray (width, height) - shape_da = xr.DataArray( - data=( - bboxes_x1y1x2y2_array[:, 2:4, :] - bboxes_x1y1x2y2_array[:, 0:2, :] - ), - dims=["image_id", "space", "id"], - coords={ - "image_id": np.arange(n_images), - "space": ["x", "y"], - "id": id_array, - }, - ) - - # confidence dataarray - confidence_da = xr.DataArray( - data=scores_array, - dims=["image_id", "id"], - coords={ - "image_id": np.arange(n_images), - "id": id_array, - }, - ) - - # label dataarray - label_da = xr.DataArray( - data=labels_array, - dims=["image_id", "id"], - coords={ - "image_id": np.arange(n_images), - "id": id_array, - }, - ) - - return { - "position": centroid_da, - "shape": shape_da, - "confidence": confidence_da, - "label": label_da, - } diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index 39a71dd3..144456b7 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -1,8 +1,10 @@ # %% # imports +from itertools import chain from pathlib import Path +import numpy as np import torch import torchvision.transforms.v2 as transforms import yaml @@ -10,6 +12,7 @@ from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 +from ethology.detectors.ensembles.fusion import WBF_across_models from ethology.detectors.ensembles.models import EnsembleDetector from ethology.detectors.evaluate import compute_precision_recall_ds from ethology.io.annotations import load_bboxes @@ -176,11 +179,26 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Run the ensemble of detectors on a dataset # Use Trainer for inference (this sets the device flexibly) trainer = Trainer(accelerator="gpu", devices=1, logger=False) -raw_predictions = trainer.predict(ensemble_detector, dataloader) +_ = trainer.predict(ensemble_detector, dataloader) +# [batch][sample][model]- dict -# format predictions as ethology detections dataset -fused_detections_ds = ensemble_detector.format_predictions(raw_predictions) +# Format predictions as ethology detections dataset +# TODO: think about syntax of format_predictions (should it be instance or +# static method instead?) +ensemble_detections_ds = ensemble_detector.format_predictions() +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Fuse detections across models +# TODO: think whether joblib approach is more readable? +image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] + +fused_detections_ds = WBF_across_models( + ensemble_detections_ds, + image_width_height=image_width_height, + iou_thr_ensemble=0.5, + skip_box_thr=0.0001, + max_n_detections=300, +) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Remove low confidence detections @@ -197,7 +215,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") iou_threshold_tp = 0.25 -fused_detections_ds, gt_bboxes_ds = compute_precision_recall_ds( +fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds( pred_bboxes_ds=fused_detections_ds_, gt_bboxes_ds=gt_bboxes_ds, iou_threshold=iou_threshold_tp, @@ -221,8 +239,28 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: "Ensemble model with confidence threshold post fusion: " f"{confidence_threshold_post_fusion:.2f}" ) -print(f"Precision: {fused_detections_ds.precision.mean().values:.4f}") -print(f"Recall: {fused_detections_ds.recall.mean().values:.4f}") +print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}") +print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}") # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Plot calibration curve + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Evaluate single models +list_detections_ds_eval = [] +for k in range(ensemble_detections_ds.sizes["model"]): + detections_ds, _ = compute_precision_recall_ds( + pred_bboxes_ds=ensemble_detections_ds.sel(model=k), + gt_bboxes_ds=gt_bboxes_ds, + iou_threshold=iou_threshold_tp + ) + list_detections_ds_eval.append(detections_ds) + + print(f"Model: {k}") + print(f"Precision: {detections_ds.precision.mean().values:.4f}") + print(f"Recall: {detections_ds.recall.mean().values:.4f}") + print("--------------------------------") + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Visualise detections From 7285fc7e1e61b160e1335d83c00791c53bd39288 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 18 Nov 2025 11:20:18 +0000 Subject: [PATCH 04/39] Simplify fusion module --- ethology/detectors/ensembles/fusion.py | 209 +++++++++++++++---------- examples/ensemble_of_detectors.py | 14 +- 2 files changed, 134 insertions(+), 89 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index e230aa18..0756bf8a 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -6,7 +6,7 @@ # TODO: review shapes are ok in docstring -def _weighted_boxes_fusion_arrays( +def _fuse_single_image_detections_WBF( position, # bboxes_x1y1: np.ndarray, # model, annot, 4 shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 confidence: np.ndarray, # model, annot @@ -17,9 +17,7 @@ def _weighted_boxes_fusion_arrays( max_n_detections: int = 300, # confidence_th_post_fusion: float = 0.7, ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Wrap weighted boxes fusion to receive arrays as input. - - Weighted boxes fusion fused boxes of one image. + """Fuse detections across models for a single image using WBF. Parameters ---------- @@ -43,7 +41,7 @@ def _weighted_boxes_fusion_arrays( max_n_detections: int Fused bounding boxes arrays are padded to this total number of boxes. Its value should be larger than the expected maximum number of - detections per image after fusing across models. + detections per image **after** fusing across models. confidence_th_post_fusion: float Threshold for removing fused detections whose confidence is below this value. @@ -56,7 +54,50 @@ def _weighted_boxes_fusion_arrays( shape, confidence and label of the fused detections. """ - # Prepare boxes array --> (position, shape) to x1y1x2y normalised + # Prepare single image arrays for fusion + list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( + _preprocess_single_image_detections( + position, shape, confidence, label, image_width_height + ) + ) + + # ------------------------------------ + # Run WBF on one image + ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( + weighted_boxes_fusion( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, + iou_thr=iou_thr_ensemble, + skip_box_thr=skip_box_thr, + ) + ) + + # ------------------------------------ + + # Format output as xarray dataarrays + centroid_da, shape_da, confidence_da, label_da = ( + _postprocess_single_image_detections( + ensemble_x1y1_x2y2_norm, + ensemble_scores, + ensemble_labels, + image_width_height, + max_n_detections, + ) + ) + + return centroid_da, shape_da, confidence_da, label_da + + +def _preprocess_single_image_detections( + position: xr.DataArray, + shape: xr.DataArray, + confidence: xr.DataArray, + label: xr.DataArray, + image_width_height: np.ndarray, +) -> list[np.ndarray]: + """Prepare ensemble detections on a single image for fusion.""" + # Prepare boxes array --> position, shape arrays to x1y1x2y normalised bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None] bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None] bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2]) @@ -97,19 +138,22 @@ def _weighted_boxes_fusion_arrays( strict=True, ) ] - # ------------------------------------ - # Run WBF on one image - ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( - weighted_boxes_fusion( - list_bboxes_per_model, - list_confidence_per_model, - list_label_per_model, - iou_thr=iou_thr_ensemble, - skip_box_thr=skip_box_thr, - ) + + return ( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, ) - # ------------------------------------ + +def _postprocess_single_image_detections( + ensemble_x1y1_x2y2_norm, + ensemble_scores, + ensemble_labels, + image_width_height, + max_n_detections, +): + """Unnormalise, pad and format fused single-image detections as data arrays.""" # Undo boxes x1y1 x2y2 normalization ensemble_x1y1_x2y2 = ensemble_x1y1_x2y2_norm * np.tile( image_width_height, (1, 2) @@ -126,7 +170,7 @@ def _weighted_boxes_fusion_arrays( ] # Pad combined array to max_n_detections - # (this is required to concatenate across image_ids + # (this is required to concatenate across image_ids) ensemble_x1y2_x2y2_scores_labels = np.pad( ensemble_x1y2_x2y2_scores_labels, ( @@ -138,79 +182,83 @@ def _weighted_boxes_fusion_arrays( ) # Format output as xarray dataarrays - centroid, shape, confidence, label = _x1y1_x2y2_as_da_tuple( - ensemble_x1y2_x2y2_scores_labels[:, 0:4], - ensemble_x1y2_x2y2_scores_labels[:, 4], - ensemble_x1y2_x2y2_scores_labels[:, 5], + centroid_da, shape_da, confidence_da, label_da = ( + _single_image_detections_as_dataarrays( + ensemble_x1y2_x2y2_scores_labels[:, 0:4], + ensemble_x1y2_x2y2_scores_labels[:, 4], + ensemble_x1y2_x2y2_scores_labels[:, 5], + ) ) - return centroid, shape, confidence, label + return centroid_da, shape_da, confidence_da, label_da -def _x1y1_x2y2_as_da_tuple( +def _single_image_detections_as_dataarrays( x1y1_x2y2_array: np.ndarray, scores_array: np.ndarray, labels_array: np.ndarray, id_array: np.ndarray | None = None, ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Reshape detections / tracks array as xarray dataset. - - Input is detections array with shape [N, 4], x1y1x2y2 in pixels - """ - n_detections = x1y1_x2y2_array.shape[0] + """Format single image fused detections as data arrays.""" if id_array is None: + n_detections = x1y1_x2y2_array.shape[0] id_array = np.arange(n_detections) - # centroid dataarray - centroid_da = xr.DataArray( - data=0.5 - * ( - x1y1_x2y2_array[:, 0:2] + x1y1_x2y2_array[:, 2:4] - ).T, # space, annot ID - dims=["space", "id"], - coords={ - "space": ["x", "y"], - "id": id_array, - }, - ) + # Extract bbox corner coordinates + x1y1, x2y2 = x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4] - # shape dataarray - shape_da = xr.DataArray( - data=( - x1y1_x2y2_array[:, 2:4] - x1y1_x2y2_array[:, 0:2] - ).T, # space, annot ID - dims=["space", "id"], - coords={ - "space": ["x", "y"], - "id": id_array, - }, - ) + # Shared coordinates + id_coords = {"id": id_array} + spatial_id_coords = {"space": ["x", "y"], **id_coords} - # confidence dataarray - confidence_da = xr.DataArray( - data=scores_array, - dims=["id"], - coords={"id": id_array}, + # Build all DataArrays + return ( + xr.DataArray( + (0.5 * (x1y1 + x2y2)).T, + dims=["space", "id"], + coords=spatial_id_coords, + ), + xr.DataArray( + (x2y2 - x1y1).T, dims=["space", "id"], coords=spatial_id_coords + ), + xr.DataArray(scores_array, dims=["id"], coords=id_coords), + xr.DataArray(labels_array, dims=["id"], coords=id_coords), ) - # label dataarray - label_da = xr.DataArray( - data=labels_array, - dims=["id"], - coords={"id": id_array}, - ) - return centroid_da, shape_da, confidence_da, label_da +def _postprocess_multi_image_fused_arrays( + position: xr.DataArray, + shape: xr.DataArray, + confidence: xr.DataArray, + label: xr.DataArray, +) -> dict: + """Postprocess fused data arrays on multiple images after fusion.""" + data_arrays = [position, shape, confidence, label] + # Remove padding across annotations + position_da, shape_da, confidence_da, label_da = [ + da.dropna(dim="id", how="all") for da in data_arrays + ] + + # Pad labels with -1 rather than nan + label_da = label_da.fillna(-1).astype(int) -def WBF_across_models( + return { + "position": position_da, + "shape": shape_da, + "confidence": confidence_da, + "label": label_da, + } + + +def fuse_ensemble_detections_WBF( ensemble_detections_ds: xr.Dataset, image_width_height: np.ndarray, iou_thr_ensemble: float = 0.5, skip_box_thr: float = 0.0001, max_n_detections: int = 300, ) -> xr.Dataset: - """Fuse detections across models using WBF.""" + """Fuse ensemble detections across models using WBF.""" wbf_kwargs = { "iou_thr_ensemble": iou_thr_ensemble, @@ -222,7 +270,7 @@ def WBF_across_models( # Run WBF across image_id centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( xr.apply_ufunc( - _weighted_boxes_fusion_arrays, + _fuse_single_image_detections_WBF, ensemble_detections_ds.position, # .data array is passed ensemble_detections_ds.shape, ensemble_detections_ds.confidence, @@ -248,23 +296,18 @@ def WBF_across_models( ) ) - # Remove pad across annotations - centroid_fused_da = centroid_fused_da.dropna(dim="id", how="all") - shape_fused_da = shape_fused_da.dropna(dim="id", how="all") - confidence_fused_da = confidence_fused_da.dropna(dim="id", how="all") - label_fused_da = label_fused_da.dropna(dim="id", how="all") - - # Pad labels with -1 rather than nan - label_fused_da = label_fused_da.fillna(-1).astype(int) + # Post process data arrays + fused_data_arrays = { + "position": centroid_fused_da, + "shape": shape_fused_da, + "confidence": confidence_fused_da, + "label": label_fused_da, + } + fused_data_arrays = _postprocess_multi_image_fused_arrays( + **fused_data_arrays + ) # Return a dataset # FIX: why is id not a coordinate in the output dataset? # FIX: order of dimensions should be image_id, space, id - return xr.Dataset( - data_vars={ - "position": centroid_fused_da, - "shape": shape_fused_da, - "confidence": confidence_fused_da, - "label": label_fused_da, - } - ) + return xr.Dataset(data_vars=fused_data_arrays) diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index 144456b7..7881bc8b 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -12,7 +12,7 @@ from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 -from ethology.detectors.ensembles.fusion import WBF_across_models +from ethology.detectors.ensembles.fusion import fuse_ensemble_detections_WBF from ethology.detectors.ensembles.models import EnsembleDetector from ethology.detectors.evaluate import compute_precision_recall_ds from ethology.io.annotations import load_bboxes @@ -182,17 +182,19 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: _ = trainer.predict(ensemble_detector, dataloader) # [batch][sample][model]- dict + # Format predictions as ethology detections dataset -# TODO: think about syntax of format_predictions (should it be instance or +# TODO: think about syntax of format_predictions (should it be instance or # static method instead?) +# Can it just be output from .predict? ensemble_detections_ds = ensemble_detector.format_predictions() # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Fuse detections across models # TODO: think whether joblib approach is more readable? -image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] +image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] -fused_detections_ds = WBF_across_models( +fused_detections_ds = fuse_ensemble_detections_WBF( ensemble_detections_ds, image_width_height=image_width_height, iou_thr_ensemble=0.5, @@ -249,11 +251,11 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Evaluate single models list_detections_ds_eval = [] -for k in range(ensemble_detections_ds.sizes["model"]): +for k in range(ensemble_detections_ds.sizes["model"]): detections_ds, _ = compute_precision_recall_ds( pred_bboxes_ds=ensemble_detections_ds.sel(model=k), gt_bboxes_ds=gt_bboxes_ds, - iou_threshold=iou_threshold_tp + iou_threshold=iou_threshold_tp, ) list_detections_ds_eval.append(detections_ds) From 5f98e4831d2184c5fe1dbbf46564996b0af845ca Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 18 Nov 2025 13:43:53 +0000 Subject: [PATCH 05/39] Adding other fusion methods --- ethology/detectors/ensembles/fusion.py | 259 ++++++++++++++++--------- ethology/detectors/ensembles/models.py | 3 +- examples/ensemble_of_detectors.py | 104 ++++++++-- 3 files changed, 255 insertions(+), 111 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 0756bf8a..a32d6252 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -2,93 +2,10 @@ import numpy as np import xarray as xr -from ensemble_boxes import weighted_boxes_fusion - - -# TODO: review shapes are ok in docstring -def _fuse_single_image_detections_WBF( - position, # bboxes_x1y1: np.ndarray, # model, annot, 4 - shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 - confidence: np.ndarray, # model, annot - label: np.ndarray, # model, annot - image_width_height: np.ndarray, # = np.array([4096, 2160]), - iou_thr_ensemble: float = 0.5, - skip_box_thr: float = 0.0001, - max_n_detections: int = 300, - # confidence_th_post_fusion: float = 0.7, -) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Fuse detections across models for a single image using WBF. - - Parameters - ---------- - position: np.ndarray - Detected positions of bounding boxes in a single image, with shape - 2, n_annot, n_models. - shape: np.ndarray - Detected shapes of bounding boxes in a single image, with shape - 2, n_annot, n_models. - confidence: np.ndarray - Confidence scores for each bounding box, with shape - n_annotations, n_models. - label: np.ndarray - Labels for each bounding box, with shape n_annotations, n_models. - image_width_height: np.ndarray - Width and height of the image, with shape 2. - iou_thr_ensemble: float - IoU threshold for detections to be considered for fusion. - skip_box_thr: float - Threshold for skipping boxes with confidence below this value. - max_n_detections: int - Fused bounding boxes arrays are padded to this total number of boxes. - Its value should be larger than the expected maximum number of - detections per image **after** fusing across models. - confidence_th_post_fusion: float - Threshold for removing fused detections whose confidence is below - this value. - - Returns - ------- - tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray] - Tuple of xr.DataArrays containing the fused detections. The arrays - are padded to max_n_detections and contain the data for the centroid, - shape, confidence and label of the fused detections. - - """ - # Prepare single image arrays for fusion - list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( - _preprocess_single_image_detections( - position, shape, confidence, label, image_width_height - ) - ) - - # ------------------------------------ - # Run WBF on one image - ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( - weighted_boxes_fusion( - list_bboxes_per_model, - list_confidence_per_model, - list_label_per_model, - iou_thr=iou_thr_ensemble, - skip_box_thr=skip_box_thr, - ) - ) - - # ------------------------------------ - - # Format output as xarray dataarrays - centroid_da, shape_da, confidence_da, label_da = ( - _postprocess_single_image_detections( - ensemble_x1y1_x2y2_norm, - ensemble_scores, - ensemble_labels, - image_width_height, - max_n_detections, - ) - ) - - return centroid_da, shape_da, confidence_da, label_da +from ensemble_boxes import weighted_boxes_fusion, nms +# ----------- Helper functions --------------------------- def _preprocess_single_image_detections( position: xr.DataArray, shape: xr.DataArray, @@ -251,22 +168,20 @@ def _postprocess_multi_image_fused_arrays( } +# ------------------------------------- + + def fuse_ensemble_detections_WBF( ensemble_detections_ds: xr.Dataset, image_width_height: np.ndarray, - iou_thr_ensemble: float = 0.5, - skip_box_thr: float = 0.0001, - max_n_detections: int = 300, + max_n_detections: int, + wbf_kwargs: dict, + # iou_thr_ensemble: float = 0.5, + # skip_box_thr: float = 0.0001, + # max_n_detections: int = 300, ) -> xr.Dataset: """Fuse ensemble detections across models using WBF.""" - wbf_kwargs = { - "iou_thr_ensemble": iou_thr_ensemble, - "skip_box_thr": skip_box_thr, - "max_n_detections": max_n_detections, - "image_width_height": image_width_height, - } - # Run WBF across image_id centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( xr.apply_ufunc( @@ -275,7 +190,72 @@ def fuse_ensemble_detections_WBF( ensemble_detections_ds.shape, ensemble_detections_ds.confidence, ensemble_detections_ds.label, - kwargs=wbf_kwargs, + kwargs={ + "image_width_height": image_width_height, + "max_n_detections": max_n_detections, + **wbf_kwargs, + }, + input_core_dims=[ # do not broadcast across these + ["space", "id", "model"], + ["space", "id", "model"], + ["id", "model"], + ["id", "model"], + ], + output_core_dims=[ + ["space", "id"], + ["space", "id"], + ["id"], + ["id"], + ], + vectorize=True, + # loop over non-core dims (i.e. image_id); + # assumes function only takes arrays over core dims as input + exclude_dims={"id"}, + # to allow dimensions that change size btw input and output + ) + ) + + # Post process data arrays + fused_data_arrays = { + "position": centroid_fused_da, + "shape": shape_fused_da, + "confidence": confidence_fused_da, + "label": label_fused_da, + } + fused_data_arrays = _postprocess_multi_image_fused_arrays( + **fused_data_arrays + ) + + # Return a dataset + # FIX: why is id not a coordinate in the output dataset? + # FIX: order of dimensions should be image_id, space, id + return xr.Dataset(data_vars=fused_data_arrays) + + +def fuse_ensemble_detections_NMS( + ensemble_detections_ds: xr.Dataset, + image_width_height: np.ndarray, + max_n_detections: int, + nms_kwargs: dict, + # iou_thr_ensemble: float = 0.5, + # skip_box_thr: float = 0.0001, + # max_n_detections: int = 300, +) -> xr.Dataset: + """Fuse ensemble detections across models using WBF.""" + + # Run WBF across image_id + centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( + xr.apply_ufunc( + _fuse_single_image_detections_NMS, + ensemble_detections_ds.position, # .data array is passed + ensemble_detections_ds.shape, + ensemble_detections_ds.confidence, + ensemble_detections_ds.label, + kwargs={ + "image_width_height": image_width_height, + "max_n_detections": max_n_detections, + **nms_kwargs, + }, input_core_dims=[ # do not broadcast across these ["space", "id", "model"], ["space", "id", "model"], @@ -311,3 +291,90 @@ def fuse_ensemble_detections_WBF( # FIX: why is id not a coordinate in the output dataset? # FIX: order of dimensions should be image_id, space, id return xr.Dataset(data_vars=fused_data_arrays) + + +# --------------- Single image --------------------------- +def _fuse_single_image_detections_WBF( + position, # bboxes_x1y1: np.ndarray, # model, annot, 4 + shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 + confidence: np.ndarray, # model, annot + label: np.ndarray, # model, annot + image_width_height: np.ndarray, # = np.array([4096, 2160]), + max_n_detections: int, + **wbf_kwargs: dict, # WBF only kwargs +) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: + """Fuse detections across models for a single image using WBF.""" + # Prepare single image arrays for fusion + list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( + _preprocess_single_image_detections( + position, shape, confidence, label, image_width_height + ) + ) + + # ------------------------------------ + # Run WBF on one image + ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( + weighted_boxes_fusion( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, + **wbf_kwargs, + ) + ) + + # ------------------------------------ + + # Format output as xarray dataarrays + centroid_da, shape_da, confidence_da, label_da = ( + _postprocess_single_image_detections( + ensemble_x1y1_x2y2_norm, + ensemble_scores, + ensemble_labels, + image_width_height, + max_n_detections, + ) + ) + + return centroid_da, shape_da, confidence_da, label_da + + +def _fuse_single_image_detections_NMS( + position, # bboxes_x1y1: np.ndarray, # model, annot, 4 + shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 + confidence: np.ndarray, # model, annot + label: np.ndarray, # model, annot + image_width_height: np.ndarray, # = np.array([4096, 2160]), + max_n_detections: int, + **nms_kwargs: dict, # NMS only kwargs +) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: + """Fuse detections across models for a single image using NMS.""" + # Prepare single image arrays for fusion + list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( + _preprocess_single_image_detections( + position, shape, confidence, label, image_width_height + ) + ) + + # ------------------------------------ + # Run WBF on one image + ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = nms( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, + **nms_kwargs, + ) + + # ------------------------------------ + + # Format output as xarray dataarrays + centroid_da, shape_da, confidence_da, label_da = ( + _postprocess_single_image_detections( + ensemble_x1y1_x2y2_norm, + ensemble_scores, + ensemble_labels, + image_width_height, + max_n_detections, + ) + ) + + return centroid_da, shape_da, confidence_da, label_da diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index e738cc96..2fbf2835 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -87,7 +87,7 @@ def predict_step(self, batch, batch_idx): return raw_prediction_dicts_per_sample - def format_predictions(self) -> xr.Dataset: + def format_predictions(self, attrs: dict) -> xr.Dataset: """Format as ethology detections dataset with model axis.""" # Get results from trainer raw_predictions_per_model = self.trainer.predict_loop.predictions @@ -156,4 +156,5 @@ def format_predictions(self) -> xr.Dataset: "id": np.arange(max_n_detections), "model": np.arange(len(self.list_models)), }, + attrs=attrs if attrs else {}, ) diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index 7881bc8b..da189253 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -1,22 +1,29 @@ # %% # imports -from itertools import chain from pathlib import Path import numpy as np import torch import torchvision.transforms.v2 as transforms +import xarray as xr import yaml from lightning import Trainer +from matplotlib import pyplot as plt +from PIL import Image from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 -from ethology.detectors.ensembles.fusion import fuse_ensemble_detections_WBF +from ethology.detectors.ensembles.fusion import ( + fuse_ensemble_detections_NMS, + fuse_ensemble_detections_WBF, +) from ethology.detectors.ensembles.models import EnsembleDetector from ethology.detectors.evaluate import compute_precision_recall_ds from ethology.io.annotations import load_bboxes +# %% +# %matplotlib widget # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% @@ -71,6 +78,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Input data dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full") +images_dir = dataset_dir / "frames" annotations_dir = dataset_dir / "annotations" annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json" @@ -87,6 +95,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Create COCO dataset # TODO: convert from ethology detections dataset to COCO dataset +# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") dataset_coco = create_coco_dataset( images_dir=Path(dataset_dir) / "frames", annotations_file=annotations_file_path, @@ -159,11 +168,13 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: }, "fusion": { "method": "wbf", - "iou_th_ensemble": 0.5, - "skip_box_th": 0.0001, - "n_jobs": -1, # workers for joblib.Parallel, n_workers should be <= number of CPU cores + "method_kwargs": { # arguments as in ensemble_boxes.weighted_boxes_fusion + "iou_thr": 0.5, # iou threshold for the ensemble + "skip_box_thr": 0.0001, + }, + # "n_jobs": -1, # workers for joblib.Parallel, n_workers should be <= number of CPU cores # "confidence_threshold_post_fusion": 0.0, - # "max_n_detections": 300 + "max_n_detections": 300, }, } config_file = "ensemble_of_detectors.yaml" @@ -183,25 +194,90 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # [batch][sample][model]- dict -# Format predictions as ethology detections dataset +# +# Format predictions as ethology detections dataset and add attrs # TODO: think about syntax of format_predictions (should it be instance or # static method instead?) -# Can it just be output from .predict? -ensemble_detections_ds = ensemble_detector.format_predictions() +# Q: Can it just be output from .predict? +# TODO: dataloader to ethology detections dataset +gt_bboxes_ds = load_bboxes.from_files( + annotations_file_path, format="COCO", images_dirs=images_dir +) +ensemble_detections_ds = ensemble_detector.format_predictions( + attrs=gt_bboxes_ds.attrs +) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Some nice plots: +# ensemble_detections_ds.confidence.sel(image_id=0).plot() +# ensemble_detections_ds.confidence.sel(model=0).plot() +for m in range(5): + plt.figure() + ensemble_detections_ds.confidence.sel(model=m).plot() + + +# %%%%%%%% +# All models predict less boxes and have less avg confidence per image in +# image_ids from 350 to 450. Let's inspect video names and images for these +# samples. + +# Add video name array +video_name = [ + ensemble_detections_ds.map_image_id_to_filename[img_id].split("_frame")[0] + for img_id in ensemble_detections_ds.image_id.values +] +ensemble_detections_ds["video"] = xr.DataArray(video_name, dims="image_id") + +# which videos? +np.unique(ensemble_detections_ds.video.sel(image_id=range(350, 450)).values) + +# %%%%%% +# Visualise image +for image_id in range(350, 450, 10): + image_filename = ensemble_detections_ds.map_image_id_to_filename[image_id] + image_path = ensemble_detections_ds.images_directories / image_filename + + # img = Image.open(image_path) + img = plt.imread(image_path) + + plt.figure() + plt.imshow(img) + plt.title(f"{image_filename}") # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Fuse detections across models +# Fuse detections across models with WBF # TODO: think whether joblib approach is more readable? image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] +config_fusion = config["fusion"] + fused_detections_ds = fuse_ensemble_detections_WBF( ensemble_detections_ds, image_width_height=image_width_height, - iou_thr_ensemble=0.5, - skip_box_thr=0.0001, - max_n_detections=300, + max_n_detections=config_fusion["max_n_detections"], + wbf_kwargs=config_fusion["method_kwargs"], + # should be larger than expected maximum number of detections after fusion + # ---- method kwargs ---- +) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Fuse detections across models with NMS + +config_fusion = config["fusion"] + +fused_detections_nms_ds = fuse_ensemble_detections_NMS( + ensemble_detections_ds, + image_width_height=image_width_height, + max_n_detections=config_fusion["max_n_detections"], + nms_kwargs={ + "iou_thr": config_fusion["method_kwargs"]["iou_thr"], + }, + # should be larger than expected maximum number of detections after fusion + # ---- method kwargs ---- ) +# fused_detections_ds = fused_detections_nms_ds # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Remove low confidence detections confidence_threshold_post_fusion = 0.5 @@ -214,7 +290,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # - load ground truth # - compute metrics -gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") +# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") iou_threshold_tp = 0.25 fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds( From fdb6901908f27f831cadb72c0bdc51e95dcea716 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:43:02 +0000 Subject: [PATCH 06/39] Add general fusion functions. Add error for insufficient max n of detections. Add types for kwargs. Add image shape validation --- ethology/detectors/ensembles/fusion.py | 427 +++++++++++++------------ ethology/detectors/ensembles/models.py | 4 +- examples/ensemble_of_detectors.py | 37 +-- 3 files changed, 236 insertions(+), 232 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index a32d6252..59b7e64a 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -2,10 +2,171 @@ import numpy as np import xarray as xr -from ensemble_boxes import weighted_boxes_fusion, nms + +import ensemble_boxes + +from typing import Callable, Optional, Literal +from functools import partial +from typing import TypedDict, Unpack + +VALID_FUSION_METHODS = { + "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion, + "nms": ensemble_boxes.nms, + "soft_nms": ensemble_boxes.soft_nms, + "non_maxium_weighted": ensemble_boxes.non_maximum_weighted, +} + +class _TypeFusionKwargs(TypedDict, total=False): + """Type hints for fusion method kwargs. + + Parameters for methods as described in the ensemble_boxes documentation. + See https://github.com/ZFTurbo/Weighted-Boxes-Fusion + + Parameters + ---------- + weights: list[float] + Weights for each model. + iou_thr: float + IoU threshold for detections to be considered a true positive + during fusion. + skip_box_thr: float + Exclude from fusion boxes with confidence below this value. + sigma: float + Sigma for soft NMS. + thresh: float + Threshold for boxes to keep after soft NMS. + conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"] + Method to compute the confidence score of the fused detections. + - "avg": Average confidence score of the fused detections (default). + - 'box_and_model_avg': box and model wise hybrid weighted average. + - 'absent_model_aware_avg': weighted average that takes into account the absent model. + allows_overflow: bool + Whether to allow the confidence score of the fused detections to exceed 1. + """ + weights: list[float] | None + iou_thr: float + skip_box_thr: float + sigma: float + thresh: float + conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"] + allows_overflow: bool + +# TODO: +# @decorator-that-checks-output-is-a-detections-dataset +def fuse_ensemble_detections( + ensemble_detections_ds: xr.Dataset, + fusion_method: Literal["weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"], + fusion_method_kwargs: Optional[dict] = None, + max_n_detections: int = 500, +) -> xr.Dataset: + """Fuse ensemble detections across models using WBF.""" + # Check if image_width_height defined in dataset + image_shape = ensemble_detections_ds.attrs.get("image_shape") + if image_shape is None: + raise KeyError( + "Required attribute 'image_shape' not found in the dataset attributes. " + "Please ensure the dataset has 'image_shape' (width, height in pixels) " + "in its attributes." + ) + else: + image_width_height = _validate_image_shape(image_shape) + + # Build single-image partial fusion function for the selected method + if fusion_method not in VALID_FUSION_METHODS: + raise ValueError( + f"Invalid fusion method: {fusion_method}. " + f"Valid methods are: {list(VALID_FUSION_METHODS.keys())}" + ) + fusion_function = VALID_FUSION_METHODS[fusion_method] + _fuse_single_image_detections_partial = partial( + _fuse_single_image_detections, fusion_function + ) + + # Prepare kwargs for fusion function + if not fusion_method_kwargs: + fusion_method_kwargs = {} + + # Run fusion across image_id using apply_ufunc + centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( + xr.apply_ufunc( + _fuse_single_image_detections_partial, + ensemble_detections_ds.position, # .data array is passed + ensemble_detections_ds.shape, + ensemble_detections_ds.confidence, + ensemble_detections_ds.label, + kwargs={ + "image_width_height": image_width_height, + "max_n_detections": max_n_detections, + **fusion_method_kwargs, + }, + input_core_dims=[ # do not broadcast across these + ["space", "id", "model"], # centroid + ["space", "id", "model"], # shape + ["id", "model"], # confidence + ["id", "model"], # label + ], + output_core_dims=[ # do not broadcast across these + ["space", "id"], # centroid + ["space", "id"], # shape + ["id"], # confidence + ["id"], # label + ], + vectorize=True, + # TODO: can I avoid vectorize? + # loop over non-core dims (i.e. image_id); + # assumes function only takes arrays over core dims as input + exclude_dims={"id"}, + # to allow dimensions that change size between input and output + ) + ) + + # Post process data arrays + fused_data_arrays = { + "position": centroid_fused_da, + "shape": shape_fused_da, + "confidence": confidence_fused_da, + "label": label_fused_da, + } + fused_data_arrays = _postprocess_multi_image_fused_arrays( + **fused_data_arrays + ) + + # Return a dataset + return xr.Dataset(data_vars=fused_data_arrays) + + +def _validate_image_shape(image_shape) -> np.ndarray: + """Validate and convert image shape to numpy array. + + Args: + image_shape: Image dimensions as (width, height). + Should be array-like with 2 elements. + + Returns: + np.ndarray: Validated image shape as 1D array with 2 elements. + + Raises: + ValueError: If image_shape cannot be converted to a valid shape. + """ + try: + image_shape = np.asarray(image_shape) + except (TypeError, ValueError) as e: + raise ValueError( + f"Cannot convert 'image_shape' to array: {e}. " + "Expected format: (width, height) as tuple or array-like." + ) from e + + # Flatten to handle (2,), (1,2) and (2,1) shapes + image_shape = image_shape.flatten() + if image_shape.shape != (2,): + raise ValueError( + f"'image_shape' must have exactly 2 elements (width, height), " + f"got shape {image_shape.shape}" + ) + + return image_shape -# ----------- Helper functions --------------------------- def _preprocess_single_image_detections( position: xr.DataArray, shape: xr.DataArray, @@ -86,6 +247,15 @@ def _postprocess_single_image_detections( ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1) ] + # Check padding + if ensemble_x1y2_x2y2_scores_labels.shape[0] > max_n_detections: + raise ValueError( + "Insufficient padding provided. " + f"The estimated maximum number of detections per image was set to {max_n_detections}, " + f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were found in one of the images " + "after fusion. Please increase the maximum number of detections per image." + ) + # Pad combined array to max_n_detections # (this is required to concatenate across image_ids) ensemble_x1y2_x2y2_scores_labels = np.pad( @@ -109,6 +279,50 @@ def _postprocess_single_image_detections( return centroid_da, shape_da, confidence_da, label_da +def _fuse_single_image_detections( + fusion_function: Callable, + position, + shape, + confidence: np.ndarray, + label: np.ndarray, + image_width_height: np.ndarray, + max_n_detections: int, + **fusion_kwargs: Unpack[_TypeFusionKwargs], # method-only kwargs +) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: + """Fuse detections across models for a single image using WBF.""" + # Prepare single image arrays for fusion + list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( + _preprocess_single_image_detections( + position, shape, confidence, label, image_width_height + ) + ) + + # ------------------------------------ + # Run WBF on one image + ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( + fusion_function( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, + **fusion_kwargs, + ) + ) + + # ------------------------------------ + + # Format output as xarray dataarrays + centroid_da, shape_da, confidence_da, label_da = ( + _postprocess_single_image_detections( + ensemble_x1y1_x2y2_norm, + ensemble_scores, + ensemble_labels, + image_width_height, + max_n_detections, + ) + ) + + return centroid_da, shape_da, confidence_da, label_da + def _single_image_detections_as_dataarrays( x1y1_x2y2_array: np.ndarray, @@ -168,213 +382,4 @@ def _postprocess_multi_image_fused_arrays( } -# ------------------------------------- - -def fuse_ensemble_detections_WBF( - ensemble_detections_ds: xr.Dataset, - image_width_height: np.ndarray, - max_n_detections: int, - wbf_kwargs: dict, - # iou_thr_ensemble: float = 0.5, - # skip_box_thr: float = 0.0001, - # max_n_detections: int = 300, -) -> xr.Dataset: - """Fuse ensemble detections across models using WBF.""" - - # Run WBF across image_id - centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( - xr.apply_ufunc( - _fuse_single_image_detections_WBF, - ensemble_detections_ds.position, # .data array is passed - ensemble_detections_ds.shape, - ensemble_detections_ds.confidence, - ensemble_detections_ds.label, - kwargs={ - "image_width_height": image_width_height, - "max_n_detections": max_n_detections, - **wbf_kwargs, - }, - input_core_dims=[ # do not broadcast across these - ["space", "id", "model"], - ["space", "id", "model"], - ["id", "model"], - ["id", "model"], - ], - output_core_dims=[ - ["space", "id"], - ["space", "id"], - ["id"], - ["id"], - ], - vectorize=True, - # loop over non-core dims (i.e. image_id); - # assumes function only takes arrays over core dims as input - exclude_dims={"id"}, - # to allow dimensions that change size btw input and output - ) - ) - - # Post process data arrays - fused_data_arrays = { - "position": centroid_fused_da, - "shape": shape_fused_da, - "confidence": confidence_fused_da, - "label": label_fused_da, - } - fused_data_arrays = _postprocess_multi_image_fused_arrays( - **fused_data_arrays - ) - - # Return a dataset - # FIX: why is id not a coordinate in the output dataset? - # FIX: order of dimensions should be image_id, space, id - return xr.Dataset(data_vars=fused_data_arrays) - - -def fuse_ensemble_detections_NMS( - ensemble_detections_ds: xr.Dataset, - image_width_height: np.ndarray, - max_n_detections: int, - nms_kwargs: dict, - # iou_thr_ensemble: float = 0.5, - # skip_box_thr: float = 0.0001, - # max_n_detections: int = 300, -) -> xr.Dataset: - """Fuse ensemble detections across models using WBF.""" - - # Run WBF across image_id - centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( - xr.apply_ufunc( - _fuse_single_image_detections_NMS, - ensemble_detections_ds.position, # .data array is passed - ensemble_detections_ds.shape, - ensemble_detections_ds.confidence, - ensemble_detections_ds.label, - kwargs={ - "image_width_height": image_width_height, - "max_n_detections": max_n_detections, - **nms_kwargs, - }, - input_core_dims=[ # do not broadcast across these - ["space", "id", "model"], - ["space", "id", "model"], - ["id", "model"], - ["id", "model"], - ], - output_core_dims=[ - ["space", "id"], - ["space", "id"], - ["id"], - ["id"], - ], - vectorize=True, - # loop over non-core dims (i.e. image_id); - # assumes function only takes arrays over core dims as input - exclude_dims={"id"}, - # to allow dimensions that change size btw input and output - ) - ) - - # Post process data arrays - fused_data_arrays = { - "position": centroid_fused_da, - "shape": shape_fused_da, - "confidence": confidence_fused_da, - "label": label_fused_da, - } - fused_data_arrays = _postprocess_multi_image_fused_arrays( - **fused_data_arrays - ) - - # Return a dataset - # FIX: why is id not a coordinate in the output dataset? - # FIX: order of dimensions should be image_id, space, id - return xr.Dataset(data_vars=fused_data_arrays) - - -# --------------- Single image --------------------------- -def _fuse_single_image_detections_WBF( - position, # bboxes_x1y1: np.ndarray, # model, annot, 4 - shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 - confidence: np.ndarray, # model, annot - label: np.ndarray, # model, annot - image_width_height: np.ndarray, # = np.array([4096, 2160]), - max_n_detections: int, - **wbf_kwargs: dict, # WBF only kwargs -) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Fuse detections across models for a single image using WBF.""" - # Prepare single image arrays for fusion - list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( - _preprocess_single_image_detections( - position, shape, confidence, label, image_width_height - ) - ) - - # ------------------------------------ - # Run WBF on one image - ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( - weighted_boxes_fusion( - list_bboxes_per_model, - list_confidence_per_model, - list_label_per_model, - **wbf_kwargs, - ) - ) - - # ------------------------------------ - - # Format output as xarray dataarrays - centroid_da, shape_da, confidence_da, label_da = ( - _postprocess_single_image_detections( - ensemble_x1y1_x2y2_norm, - ensemble_scores, - ensemble_labels, - image_width_height, - max_n_detections, - ) - ) - - return centroid_da, shape_da, confidence_da, label_da - - -def _fuse_single_image_detections_NMS( - position, # bboxes_x1y1: np.ndarray, # model, annot, 4 - shape, # bboxes_x2y2: np.ndarray, # model, annot, 4 - confidence: np.ndarray, # model, annot - label: np.ndarray, # model, annot - image_width_height: np.ndarray, # = np.array([4096, 2160]), - max_n_detections: int, - **nms_kwargs: dict, # NMS only kwargs -) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Fuse detections across models for a single image using NMS.""" - # Prepare single image arrays for fusion - list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( - _preprocess_single_image_detections( - position, shape, confidence, label, image_width_height - ) - ) - - # ------------------------------------ - # Run WBF on one image - ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = nms( - list_bboxes_per_model, - list_confidence_per_model, - list_label_per_model, - **nms_kwargs, - ) - - # ------------------------------------ - - # Format output as xarray dataarrays - centroid_da, shape_da, confidence_da, label_da = ( - _postprocess_single_image_detections( - ensemble_x1y1_x2y2_norm, - ensemble_scores, - ensemble_labels, - image_width_height, - max_n_detections, - ) - ) - - return centroid_da, shape_da, confidence_da, label_da diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 2fbf2835..412b9fbc 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -87,7 +87,9 @@ def predict_step(self, batch, batch_idx): return raw_prediction_dicts_per_sample - def format_predictions(self, attrs: dict) -> xr.Dataset: + # TODO: + # @decorator-that-checks-output-is-a-detections-dataset + def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: """Format as ethology detections dataset with model axis.""" # Get results from trainer raw_predictions_per_model = self.trainer.predict_loop.predictions diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index da189253..71a9336f 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -14,10 +14,7 @@ from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 -from ethology.detectors.ensembles.fusion import ( - fuse_ensemble_detections_NMS, - fuse_ensemble_detections_WBF, -) +from ethology.detectors.ensembles.fusion import fuse_ensemble_detections from ethology.detectors.ensembles.models import EnsembleDetector from ethology.detectors.evaluate import compute_precision_recall_ds from ethology.io.annotations import load_bboxes @@ -167,7 +164,8 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: ], }, "fusion": { - "method": "wbf", + "method": "weighted_boxes_fusion", + # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted" "method_kwargs": { # arguments as in ensemble_boxes.weighted_boxes_fusion "iou_thr": 0.5, # iou threshold for the ensemble "skip_box_thr": 0.0001, @@ -191,10 +189,8 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Use Trainer for inference (this sets the device flexibly) trainer = Trainer(accelerator="gpu", devices=1, logger=False) _ = trainer.predict(ensemble_detector, dataloader) -# [batch][sample][model]- dict -# # Format predictions as ethology detections dataset and add attrs # TODO: think about syntax of format_predictions (should it be instance or # static method instead?) @@ -249,14 +245,17 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Fuse detections across models with WBF # TODO: think whether joblib approach is more readable? image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] +ensemble_detections_ds.attrs['image_shape'] = image_width_height config_fusion = config["fusion"] -fused_detections_ds = fuse_ensemble_detections_WBF( + +# %% +fused_detections_ds = fuse_ensemble_detections( ensemble_detections_ds, - image_width_height=image_width_height, - max_n_detections=config_fusion["max_n_detections"], - wbf_kwargs=config_fusion["method_kwargs"], + fusion_method=config_fusion['method'], + fusion_method_kwargs=config_fusion["method_kwargs"], + # max_n_detections=config_fusion["max_n_detections"], # should be larger than expected maximum number of detections after fusion # ---- method kwargs ---- ) @@ -264,20 +263,18 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Fuse detections across models with NMS -config_fusion = config["fusion"] - -fused_detections_nms_ds = fuse_ensemble_detections_NMS( +fused_detections_nms_ds = fuse_ensemble_detections( ensemble_detections_ds, - image_width_height=image_width_height, - max_n_detections=config_fusion["max_n_detections"], - nms_kwargs={ + fusion_method='soft_nms', + fusion_method_kwargs={ "iou_thr": config_fusion["method_kwargs"]["iou_thr"], + "sigma":0.5, + "thresh":0.001 }, - # should be larger than expected maximum number of detections after fusion - # ---- method kwargs ---- + max_n_detections=500 ) -# fused_detections_ds = fused_detections_nms_ds +fused_detections_ds = fused_detections_nms_ds # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Remove low confidence detections confidence_threshold_post_fusion = 0.5 From 7deace18c95f73946a30a7d34d27c3795126e24e Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:50:31 +0000 Subject: [PATCH 07/39] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ethology/detectors/ensembles/fusion.py | 68 ++++++++++++++------------ ethology/detectors/ensembles/utils.py | 3 -- examples/ensemble_of_detectors.py | 13 +++-- 3 files changed, 42 insertions(+), 42 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 59b7e64a..913f34c0 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -1,13 +1,12 @@ """Wrappers around ensemble-boxes fusion functions.""" -import numpy as np -import xarray as xr +from collections.abc import Callable +from functools import partial +from typing import Literal, TypedDict, Unpack import ensemble_boxes - -from typing import Callable, Optional, Literal -from functools import partial -from typing import TypedDict, Unpack +import numpy as np +import xarray as xr VALID_FUSION_METHODS = { "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion, @@ -16,18 +15,19 @@ "non_maxium_weighted": ensemble_boxes.non_maximum_weighted, } + class _TypeFusionKwargs(TypedDict, total=False): """Type hints for fusion method kwargs. Parameters for methods as described in the ensemble_boxes documentation. See https://github.com/ZFTurbo/Weighted-Boxes-Fusion - + Parameters ---------- weights: list[float] Weights for each model. iou_thr: float - IoU threshold for detections to be considered a true positive + IoU threshold for detections to be considered a true positive during fusion. skip_box_thr: float Exclude from fusion boxes with confidence below this value. @@ -42,7 +42,9 @@ class _TypeFusionKwargs(TypedDict, total=False): - 'absent_model_aware_avg': weighted average that takes into account the absent model. allows_overflow: bool Whether to allow the confidence score of the fused detections to exceed 1. + """ + weights: list[float] | None iou_thr: float skip_box_thr: float @@ -51,12 +53,15 @@ class _TypeFusionKwargs(TypedDict, total=False): conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"] allows_overflow: bool + # TODO: # @decorator-that-checks-output-is-a-detections-dataset def fuse_ensemble_detections( ensemble_detections_ds: xr.Dataset, - fusion_method: Literal["weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted"], - fusion_method_kwargs: Optional[dict] = None, + fusion_method: Literal[ + "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted" + ], + fusion_method_kwargs: dict | None = None, max_n_detections: int = 500, ) -> xr.Dataset: """Fuse ensemble detections across models using WBF.""" @@ -100,16 +105,16 @@ def fuse_ensemble_detections( **fusion_method_kwargs, }, input_core_dims=[ # do not broadcast across these - ["space", "id", "model"], # centroid - ["space", "id", "model"], # shape - ["id", "model"], # confidence - ["id", "model"], # label + ["space", "id", "model"], # centroid + ["space", "id", "model"], # shape + ["id", "model"], # confidence + ["id", "model"], # label ], - output_core_dims=[ # do not broadcast across these - ["space", "id"], # centroid - ["space", "id"], # shape - ["id"], # confidence - ["id"], # label + output_core_dims=[ # do not broadcast across these + ["space", "id"], # centroid + ["space", "id"], # shape + ["id"], # confidence + ["id"], # label ], vectorize=True, # TODO: can I avoid vectorize? @@ -137,16 +142,17 @@ def fuse_ensemble_detections( def _validate_image_shape(image_shape) -> np.ndarray: """Validate and convert image shape to numpy array. - + Args: image_shape: Image dimensions as (width, height). Should be array-like with 2 elements. - + Returns: np.ndarray: Validated image shape as 1D array with 2 elements. - + Raises: ValueError: If image_shape cannot be converted to a valid shape. + """ try: image_shape = np.asarray(image_shape) @@ -155,7 +161,7 @@ def _validate_image_shape(image_shape) -> np.ndarray: f"Cannot convert 'image_shape' to array: {e}. " "Expected format: (width, height) as tuple or array-like." ) from e - + # Flatten to handle (2,), (1,2) and (2,1) shapes image_shape = image_shape.flatten() if image_shape.shape != (2,): @@ -163,7 +169,7 @@ def _validate_image_shape(image_shape) -> np.ndarray: f"'image_shape' must have exactly 2 elements (width, height), " f"got shape {image_shape.shape}" ) - + return image_shape @@ -279,13 +285,14 @@ def _postprocess_single_image_detections( return centroid_da, shape_da, confidence_da, label_da + def _fuse_single_image_detections( fusion_function: Callable, - position, - shape, - confidence: np.ndarray, - label: np.ndarray, - image_width_height: np.ndarray, + position, + shape, + confidence: np.ndarray, + label: np.ndarray, + image_width_height: np.ndarray, max_n_detections: int, **fusion_kwargs: Unpack[_TypeFusionKwargs], # method-only kwargs ) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: @@ -380,6 +387,3 @@ def _postprocess_multi_image_fused_arrays( "confidence": confidence_da, "label": label_da, } - - - diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py index 53f24dce..75d8e907 100644 --- a/ethology/detectors/ensembles/utils.py +++ b/ethology/detectors/ensembles/utils.py @@ -1,7 +1,6 @@ """Utility functions for reshaping outputs of ensembles of detectors.""" import numpy as np -import xarray as xr def get_padding_width(array, max_n): @@ -24,5 +23,3 @@ def pad_to_max_first_dimension(list_arrays, fill_value=np.nan): for arr in list_arrays ] return list_arrays_padded - - diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index 71a9336f..dcb28d1c 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -10,7 +10,6 @@ import yaml from lightning import Trainer from matplotlib import pyplot as plt -from PIL import Image from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 @@ -245,7 +244,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Fuse detections across models with WBF # TODO: think whether joblib approach is more readable? image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] -ensemble_detections_ds.attrs['image_shape'] = image_width_height +ensemble_detections_ds.attrs["image_shape"] = image_width_height config_fusion = config["fusion"] @@ -253,7 +252,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %% fused_detections_ds = fuse_ensemble_detections( ensemble_detections_ds, - fusion_method=config_fusion['method'], + fusion_method=config_fusion["method"], fusion_method_kwargs=config_fusion["method_kwargs"], # max_n_detections=config_fusion["max_n_detections"], # should be larger than expected maximum number of detections after fusion @@ -265,13 +264,13 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: fused_detections_nms_ds = fuse_ensemble_detections( ensemble_detections_ds, - fusion_method='soft_nms', + fusion_method="soft_nms", fusion_method_kwargs={ "iou_thr": config_fusion["method_kwargs"]["iou_thr"], - "sigma":0.5, - "thresh":0.001 + "sigma": 0.5, + "thresh": 0.001, }, - max_n_detections=500 + max_n_detections=500, ) fused_detections_ds = fused_detections_nms_ds From d989fa1b9cb33a83f3a7c690b735f335d15ecc01 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 19 Nov 2025 11:48:21 +0000 Subject: [PATCH 08/39] Validate supported detectors. Refactor model state dict fetching --- ethology/detectors/ensembles/models.py | 80 +++++++++++++++++++------- 1 file changed, 58 insertions(+), 22 deletions(-) diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 412b9fbc..4622a531 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -6,10 +6,10 @@ import numpy as np import torch import torch.nn as nn -import torchvision.models.detection as detection_models import xarray as xr import yaml from lightning import LightningModule +from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import pad_to_max_first_dimension @@ -32,39 +32,75 @@ def __init__(self, config_file: str | Path): with open(self.config_file) as f: self.config = yaml.safe_load(f) + # Run checks + self._validate_model_class() + # Load list of models (nn.ModuleList) - self.list_models = self.load_models() + self.list_models = self._load_models() + + @staticmethod + def _validate_model_class(model_class_str: str) -> None: + """Validate that the model is part of torchvision.models.detection.""" + valid_models = set(list_models(module=detection)) + if model_class_str not in valid_models: + valid_sorted = ", ".join(sorted(valid_models)) + raise ValueError( + f"'{model_class_str}' is not a supported detection model. " + f"Valid options: {valid_sorted}" + ) - def load_models(self) -> nn.ModuleList: + def _load_models(self) -> nn.ModuleList: """Load models from checkpoints.""" + # Get model architecture models_config = self.config["models"] - model_class = getattr(detection_models, models_config["model_class"]) + model = get_model( + models_config["model_class"], + **models_config.get("model_kwargs", {}), + ) + # Load weights list_models = [] for checkpoint_path in models_config["checkpoints"]: - # Get model architecture and weights - model = model_class(**models_config["model_kwargs"]) + # Get checkpoint checkpoint = torch.load(checkpoint_path, map_location=self.device) - state_dict = checkpoint["state_dict"] - # Load state dict into model - # PyTorch Lightning saves the model with a "model." - # prefix in the state_dict keys if you defined self.model - # in your LightningModule - we remove the prefix here. - if any(key.startswith("model.") for key in state_dict): - model_state_dict = { - key.replace("model.", "", 1): value - for key, value in state_dict.items() - if key.startswith("model.") - } - else: - model_state_dict = state_dict - model.load_state_dict(model_state_dict) - - # Append to list + # Load state dict + model_state_dict = self._get_model_state_dict(checkpoint) + model.load_state_dict(model_state_dict, strict=True) + list_models.append(model) + return nn.ModuleList(list_models) + @staticmethod + def _get_model_state_dict(checkpoint): + # Handle different checkpoint formats + if "state_dict" in checkpoint: + state_dict = checkpoint["state_dict"] + elif isinstance(checkpoint, dict): + # Checkpoint might be the state dict itself + state_dict = checkpoint + else: + raise ValueError( + "Checkpoint format not recognized. " + "Expected 'state_dict' key or dict of tensors." + ) + + # Load state dict into model + # PyTorch Lightning saves the model with a "model." + # prefix in the state_dict keys if you defined self.model + # in your LightningModule - we remove the prefix here. + if any(key.startswith("model.") for key in state_dict): + model_state_dict = { + key.replace("model.", "", 1): value + for key, value in state_dict.items() + if key.startswith("model.") + } + else: + model_state_dict = state_dict + + return model_state_dict + def predict_step(self, batch, batch_idx): """Predict step for a single batch.""" # ------------------------------ From 62b45e5b883ddee27b54df3e8a0d7c0d4445f956 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:42:38 +0000 Subject: [PATCH 09/39] Rename to ValidBboxAnnotationsDataset. Add validator for bbox detections --- ethology/io/annotations/load_bboxes.py | 6 +- ethology/io/annotations/save_bboxes.py | 11 ++- ethology/io/annotations/validate.py | 39 +--------- ethology/io/detections/validate.py | 71 +++++++++++++++++++ ethology/io/validate.py | 39 ++++++++++ .../test_io_annotations/test_validators.py | 6 +- 6 files changed, 122 insertions(+), 50 deletions(-) create mode 100644 ethology/io/detections/validate.py create mode 100644 ethology/io/validate.py diff --git a/ethology/io/annotations/load_bboxes.py b/ethology/io/annotations/load_bboxes.py index d59d0abc..5812d346 100644 --- a/ethology/io/annotations/load_bboxes.py +++ b/ethology/io/annotations/load_bboxes.py @@ -11,15 +11,15 @@ from pandera.typing.pandas import DataFrame from ethology.io.annotations.validate import ( + ValidBboxAnnotationsDataset, ValidBboxesDataFrame, - ValidBboxesDataset, ValidCOCO, ValidVIA, - _check_output, ) +from ethology.io.validate import _check_output -@_check_output(ValidBboxesDataset) +@_check_output(ValidBboxAnnotationsDataset) def from_files( file_paths: Path | str | list[Path | str], format: Literal["VIA", "COCO"], diff --git a/ethology/io/annotations/save_bboxes.py b/ethology/io/annotations/save_bboxes.py index bf9e09ef..0dd4b0d4 100644 --- a/ethology/io/annotations/save_bboxes.py +++ b/ethology/io/annotations/save_bboxes.py @@ -12,15 +12,14 @@ from pandera.typing.pandas import DataFrame from ethology.io.annotations.validate import ( + ValidBboxAnnotationsDataset, ValidBboxesDataFrameCOCO, - ValidBboxesDataset, ValidCOCO, - _check_input, - _check_output, ) +from ethology.io.validate import _check_input, _check_output -@_check_input(validator=ValidBboxesDataset) +@_check_input(validator=ValidBboxAnnotationsDataset) @_check_output(validator=ValidCOCO) # check output is ethology importable def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path): """Save an ``ethology`` bounding box annotations dataset to a COCO file. @@ -56,7 +55,7 @@ def to_COCO_file(dataset: xr.Dataset, output_filepath: str | Path): return output_filepath -@_check_input(validator=ValidBboxesDataset) +@_check_input(validator=ValidBboxAnnotationsDataset) @pa.check_types def _to_COCO_exportable_df( ds: xr.Dataset, @@ -98,7 +97,7 @@ def _to_COCO_exportable_df( return df[cols_to_select] -@_check_input(validator=ValidBboxesDataset) +@_check_input(validator=ValidBboxAnnotationsDataset) def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame: """Get preliminary dataframe from a dataset of bounding boxes annotations. diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py index 2e00ab92..233e80d0 100644 --- a/ethology/io/annotations/validate.py +++ b/ethology/io/annotations/validate.py @@ -1,8 +1,6 @@ """Validators for annotation files and datasets.""" import json -from collections.abc import Callable -from functools import wraps from pathlib import Path import pandas as pd @@ -227,7 +225,7 @@ def _file_contains_unique_image_IDs(self, attribute, value): @define -class ValidBboxesDataset: +class ValidBboxAnnotationsDataset: """Class for valid ``ethology`` bounding box annotations datasets. It checks that the input dataset has: @@ -573,38 +571,3 @@ def check_idx_and_annotation_id(cls, df: pd.DataFrame) -> bool: """ return all(df.index == df["annotation_id"]) - - -def _check_output(validator: type): - """Return a decorator that validates the output of a function.""" - - def decorator(function: Callable) -> Callable: - @wraps(function) # to preserve function metadata - def wrapper(*args, **kwargs): - result = function(*args, **kwargs) - validator(result) - return result - - return wrapper - - return decorator - - -def _check_input(validator: type, input_index: int = 0): - """Return a decorator that validates a specific input of a function. - - By default, the first input is validated. If the input index is - larger than the number of inputs, no validation is performed. - """ - - def decorator(function: Callable) -> Callable: - @wraps(function) - def wrapper(*args, **kwargs): - if len(args) > input_index: - validator(args[input_index]) - result = function(*args, **kwargs) - return result - - return wrapper - - return decorator diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py new file mode 100644 index 00000000..67075c95 --- /dev/null +++ b/ethology/io/detections/validate.py @@ -0,0 +1,71 @@ +"""Validators for detection datasets.""" + +import xarray as xr +from attrs import define, field + + +@define +class ValidBboxDetectionsDataset: + """Class for valid ``ethology`` bounding box detections datasets. + + It checks that the input dataset has: + + - ``image_id``, ``space``, ``id`` as dimensions + - ``position``, ``shape`` and ``confidence`` as data variables + + Attributes + ---------- + dataset : xarray.Dataset + The xarray dataset to validate. + + Raises + ------ + TypeError + If the input is not an xarray Dataset. + ValueError + If the dataset is missing required data variables or dimensions. + + Notes + ----- + The dataset can have other data variables and dimensions, but only the + required ones are checked. + + """ + + dataset: xr.Dataset = field() + + # Minimum requirements for annotations datasets holding bboxes + required_dims: set = field( + default={"image_id", "space", "id"}, + init=False, + ) + required_data_vars: set = field( + default={"position", "shape", "confidence"}, + init=False, + ) + + @dataset.validator + def _check_dataset_type(self, attribute, value): + """Ensure the input is an xarray Dataset.""" + if not isinstance(value, xr.Dataset): + raise TypeError( + f"Expected an xarray Dataset, but got {type(value)}." + ) + + @dataset.validator + def _check_required_data_variables(self, attribute, value): + """Ensure the dataset has all required data variables.""" + missing_vars = self.required_data_vars - set(value.data_vars) + if missing_vars: + raise ValueError( + f"Missing required data variables: {sorted(missing_vars)}" + ) + + @dataset.validator + def _check_required_dimensions(self, attribute, value): + """Ensure the dataset has all required dimensions.""" + missing_dims = self.required_dims - set(value.dims) + if missing_dims: + raise ValueError( + f"Missing required dimensions: {sorted(missing_dims)}" + ) diff --git a/ethology/io/validate.py b/ethology/io/validate.py new file mode 100644 index 00000000..ca515b19 --- /dev/null +++ b/ethology/io/validate.py @@ -0,0 +1,39 @@ +"""Utils for validating `ethology` objects.""" + +from collections.abc import Callable +from functools import wraps + + +def _check_output(validator: type): + """Return a decorator that validates the output of a function.""" + + def decorator(function: Callable) -> Callable: + @wraps(function) # to preserve function metadata + def wrapper(*args, **kwargs): + result = function(*args, **kwargs) + validator(result) + return result + + return wrapper + + return decorator + + +def _check_input(validator: type, input_index: int = 0): + """Return a decorator that validates a specific input of a function. + + By default, the first input is validated. If the input index is + larger than the number of inputs, no validation is performed. + """ + + def decorator(function: Callable) -> Callable: + @wraps(function) + def wrapper(*args, **kwargs): + if len(args) > input_index: + validator(args[input_index]) + result = function(*args, **kwargs) + return result + + return wrapper + + return decorator diff --git a/tests/test_unit/test_io_annotations/test_validators.py b/tests/test_unit/test_io_annotations/test_validators.py index d054da27..3ae8e417 100644 --- a/tests/test_unit/test_io_annotations/test_validators.py +++ b/tests/test_unit/test_io_annotations/test_validators.py @@ -11,7 +11,7 @@ _extract_properties_keys, ) from ethology.io.annotations.validate import ( - ValidBboxesDataset, + ValidBboxAnnotationsDataset, ValidCOCO, ValidVIA, ) @@ -557,7 +557,7 @@ def test_valid_bboxes_dataset_validation( expected_error_message: str, request: pytest.FixtureRequest, ): - """Test ValidBboxesDataset validation with various input scenarios.""" + """Test bbox annotations dataset validator with various input scenarios.""" # Get dataset to validate if isinstance(sample_dataset, str): dataset = request.getfixturevalue(sample_dataset) @@ -566,7 +566,7 @@ def test_valid_bboxes_dataset_validation( # Run validation and check exception with expected_exception as excinfo: - validator = ValidBboxesDataset(dataset=dataset) + validator = ValidBboxAnnotationsDataset(dataset=dataset) if excinfo: error_msg = str(excinfo.value) From bc43d431d661f4fd09a37fbbd1a727e329dc9ec8 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 19 Nov 2025 13:53:29 +0000 Subject: [PATCH 10/39] Add decorators to validate bbox detections dataset --- ethology/detectors/ensembles/fusion.py | 21 ++++++++++++++++++--- ethology/detectors/ensembles/models.py | 9 ++++++--- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 913f34c0..4d5ee7fc 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -4,10 +4,18 @@ from functools import partial from typing import Literal, TypedDict, Unpack +import ensemble_boxes +from collections.abc import Callable +from functools import partial +from typing import Literal, TypedDict, Unpack + import ensemble_boxes import numpy as np import xarray as xr +from ethology.io.detections.validate import ValidBboxDetectionsDataset +from ethology.io.validate import _check_output + VALID_FUSION_METHODS = { "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion, "nms": ensemble_boxes.nms, @@ -16,17 +24,20 @@ } + class _TypeFusionKwargs(TypedDict, total=False): """Type hints for fusion method kwargs. Parameters for methods as described in the ensemble_boxes documentation. See https://github.com/ZFTurbo/Weighted-Boxes-Fusion + Parameters ---------- weights: list[float] Weights for each model. iou_thr: float + IoU threshold for detections to be considered a true positive IoU threshold for detections to be considered a true positive during fusion. skip_box_thr: float @@ -42,9 +53,10 @@ class _TypeFusionKwargs(TypedDict, total=False): - 'absent_model_aware_avg': weighted average that takes into account the absent model. allows_overflow: bool Whether to allow the confidence score of the fused detections to exceed 1. - + """ + weights: list[float] | None iou_thr: float skip_box_thr: float @@ -54,14 +66,17 @@ class _TypeFusionKwargs(TypedDict, total=False): allows_overflow: bool -# TODO: -# @decorator-that-checks-output-is-a-detections-dataset +@_check_output(ValidBboxDetectionsDataset) def fuse_ensemble_detections( ensemble_detections_ds: xr.Dataset, fusion_method: Literal[ "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted" ], fusion_method_kwargs: dict | None = None, + fusion_method: Literal[ + "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted" + ], + fusion_method_kwargs: dict | None = None, max_n_detections: int = 500, ) -> xr.Dataset: """Fuse ensemble detections across models using WBF.""" diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 4622a531..652d4876 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -12,6 +12,8 @@ from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import pad_to_max_first_dimension +from ethology.io.detections.validate import ValidBboxDetectionsDataset +from ethology.io.validate import _check_output class EnsembleDetector(LightningModule): @@ -123,9 +125,10 @@ def predict_step(self, batch, batch_idx): return raw_prediction_dicts_per_sample - # TODO: - # @decorator-that-checks-output-is-a-detections-dataset - def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: + @_check_output(ValidBboxDetectionsDataset) + def format_predictions( + self, attrs: dict | None = None + ) -> xr.Dataset: """Format as ethology detections dataset with model axis.""" # Get results from trainer raw_predictions_per_model = self.trainer.predict_loop.predictions From 1564e0b2f402f4d227d2af43d71f2cbf0838f613 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:25:46 +0000 Subject: [PATCH 11/39] Add abstract base class for dataset validators --- ethology/io/annotations/validate.py | 38 +++---------- ethology/io/detections/validate.py | 39 +++----------- ethology/io/validate.py | 82 +++++++++++++++++++++++++++++ 3 files changed, 97 insertions(+), 62 deletions(-) diff --git a/ethology/io/annotations/validate.py b/ethology/io/annotations/validate.py index 233e80d0..04b81a60 100644 --- a/ethology/io/annotations/validate.py +++ b/ethology/io/annotations/validate.py @@ -5,7 +5,6 @@ import pandas as pd import pandera.pandas as pa -import xarray as xr from attrs import define, field from pandera.typing import Index @@ -15,6 +14,7 @@ _check_required_keys_in_dict, _get_default_schema, ) +from ethology.io.validate import ValidDataset @define @@ -225,7 +225,7 @@ def _file_contains_unique_image_IDs(self, attribute, value): @define -class ValidBboxAnnotationsDataset: +class ValidBboxAnnotationsDataset(ValidDataset): """Class for valid ``ethology`` bounding box annotations datasets. It checks that the input dataset has: @@ -237,6 +237,10 @@ class ValidBboxAnnotationsDataset: ---------- dataset : xarray.Dataset The xarray dataset to validate. + required_dims : set + Set of required dimension names. + required_data_vars : set + Set of required data variable names. Raises ------ @@ -252,9 +256,7 @@ class ValidBboxAnnotationsDataset: """ - dataset: xr.Dataset = field() - - # Minimum requirements for annotations datasets holding bboxes + # Minimum requirements for a bbox dataset holding detections required_dims: set = field( default={"image_id", "space", "id"}, init=False, @@ -264,32 +266,6 @@ class ValidBboxAnnotationsDataset: init=False, ) - @dataset.validator - def _check_dataset_type(self, attribute, value): - """Ensure the input is an xarray Dataset.""" - if not isinstance(value, xr.Dataset): - raise TypeError( - f"Expected an xarray Dataset, but got {type(value)}." - ) - - @dataset.validator - def _check_required_data_variables(self, attribute, value): - """Ensure the dataset has all required data variables.""" - missing_vars = self.required_data_vars - set(value.data_vars) - if missing_vars: - raise ValueError( - f"Missing required data variables: {sorted(missing_vars)}" - ) - - @dataset.validator - def _check_required_dimensions(self, attribute, value): - """Ensure the dataset has all required dimensions.""" - missing_dims = self.required_dims - set(value.dims) - if missing_dims: - raise ValueError( - f"Missing required dimensions: {sorted(missing_dims)}" - ) - class ValidBboxesDataFrame(pa.DataFrameModel): """Class for valid bounding boxes intermediate dataframes. diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py index 67075c95..7ef6285d 100644 --- a/ethology/io/detections/validate.py +++ b/ethology/io/detections/validate.py @@ -1,11 +1,12 @@ """Validators for detection datasets.""" -import xarray as xr from attrs import define, field +from ethology.io.validate import ValidDataset + @define -class ValidBboxDetectionsDataset: +class ValidBboxDetectionsDataset(ValidDataset): """Class for valid ``ethology`` bounding box detections datasets. It checks that the input dataset has: @@ -17,6 +18,10 @@ class ValidBboxDetectionsDataset: ---------- dataset : xarray.Dataset The xarray dataset to validate. + required_dims : set + Set of required dimension names. + required_data_vars : set + Set of required data variable names. Raises ------ @@ -32,9 +37,7 @@ class ValidBboxDetectionsDataset: """ - dataset: xr.Dataset = field() - - # Minimum requirements for annotations datasets holding bboxes + # Minimum requirements for a bbox dataset holding detections required_dims: set = field( default={"image_id", "space", "id"}, init=False, @@ -43,29 +46,3 @@ class ValidBboxDetectionsDataset: default={"position", "shape", "confidence"}, init=False, ) - - @dataset.validator - def _check_dataset_type(self, attribute, value): - """Ensure the input is an xarray Dataset.""" - if not isinstance(value, xr.Dataset): - raise TypeError( - f"Expected an xarray Dataset, but got {type(value)}." - ) - - @dataset.validator - def _check_required_data_variables(self, attribute, value): - """Ensure the dataset has all required data variables.""" - missing_vars = self.required_data_vars - set(value.data_vars) - if missing_vars: - raise ValueError( - f"Missing required data variables: {sorted(missing_vars)}" - ) - - @dataset.validator - def _check_required_dimensions(self, attribute, value): - """Ensure the dataset has all required dimensions.""" - missing_dims = self.required_dims - set(value.dims) - if missing_dims: - raise ValueError( - f"Missing required dimensions: {sorted(missing_dims)}" - ) diff --git a/ethology/io/validate.py b/ethology/io/validate.py index ca515b19..22c215f9 100644 --- a/ethology/io/validate.py +++ b/ethology/io/validate.py @@ -1,8 +1,90 @@ """Utils for validating `ethology` objects.""" +from abc import ABC, abstractmethod from collections.abc import Callable from functools import wraps +import xarray as xr +from attrs import define, field + + +@define +class ValidDataset(ABC): + """An abstract base class for valid ``ethology`` datasets. + + It checks that the input dataset has: + + - required dimensions + - required data variables + + Subclasses must define ``required_dims`` and ``required_data_vars`` + attributes. + + Attributes + ---------- + dataset : xarray.Dataset + The xarray dataset to validate. + required_dims : set + Set of required dimension names (defined by subclasses). + required_data_vars : set + Set of required data variable names (defined by subclasses). + + Raises + ------ + TypeError + If the input is not an xarray Dataset. + ValueError + If the dataset is missing required data variables or dimensions. + + Notes + ----- + The dataset can have other data variables and dimensions, but only the + required ones are checked. + + """ + + dataset: xr.Dataset = field() + + # Subclasses should override these abstract properties + @property + @abstractmethod + def required_dims(self) -> set: + """Subclasses must provide a required_dims property.""" + pass + + @property + @abstractmethod + def required_data_vars(self) -> set: + """Subclasses must provide a required_data_vars property.""" + pass + + # Validators + @dataset.validator + def _check_dataset_type(self, attribute, value): + """Ensure the input is an xarray Dataset.""" + if not isinstance(value, xr.Dataset): + raise TypeError( + f"Expected an xarray Dataset, but got {type(value)}." + ) + + @dataset.validator + def _check_required_data_variables(self, attribute, value): + """Ensure the dataset has all required data variables.""" + missing_vars = self.required_data_vars - set(value.data_vars) + if missing_vars: + raise ValueError( + f"Missing required data variables: {sorted(missing_vars)}" + ) + + @dataset.validator + def _check_required_dimensions(self, attribute, value): + """Ensure the dataset has all required dimensions.""" + missing_dims = self.required_dims - set(value.dims) + if missing_dims: + raise ValueError( + f"Missing required dimensions: {sorted(missing_dims)}" + ) + def _check_output(validator: type): """Return a decorator that validates the output of a function.""" From 329160629a7144df7c24e1389e74382bdbbd8db6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 19 Nov 2025 19:40:59 +0000 Subject: [PATCH 12/39] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ethology/detectors/ensembles/fusion.py | 2 +- ethology/detectors/ensembles/models.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 4d5ee7fc..f7cf8e61 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -53,7 +53,7 @@ class _TypeFusionKwargs(TypedDict, total=False): - 'absent_model_aware_avg': weighted average that takes into account the absent model. allows_overflow: bool Whether to allow the confidence score of the fused detections to exceed 1. - + """ diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 652d4876..92869d47 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -126,9 +126,7 @@ def predict_step(self, batch, batch_idx): return raw_prediction_dicts_per_sample @_check_output(ValidBboxDetectionsDataset) - def format_predictions( - self, attrs: dict | None = None - ) -> xr.Dataset: + def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: """Format as ethology detections dataset with model axis.""" # Get results from trainer raw_predictions_per_model = self.trainer.predict_loop.predictions From c384340c8d47c7fdc3856ff623413407ae53939b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 11:57:28 +0000 Subject: [PATCH 13/39] pre-commit fixes --- ethology/detectors/ensembles/fusion.py | 39 ++++++++++++-------------- ethology/detectors/ensembles/models.py | 13 +++++++-- ethology/detectors/ensembles/utils.py | 2 +- 3 files changed, 29 insertions(+), 25 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index f7cf8e61..a48914ac 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -4,11 +4,6 @@ from functools import partial from typing import Literal, TypedDict, Unpack -import ensemble_boxes -from collections.abc import Callable -from functools import partial -from typing import Literal, TypedDict, Unpack - import ensemble_boxes import numpy as np import xarray as xr @@ -24,7 +19,6 @@ } - class _TypeFusionKwargs(TypedDict, total=False): """Type hints for fusion method kwargs. @@ -48,15 +42,17 @@ class _TypeFusionKwargs(TypedDict, total=False): Threshold for boxes to keep after soft NMS. conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"] Method to compute the confidence score of the fused detections. + - "avg": Average confidence score of the fused detections (default). - 'box_and_model_avg': box and model wise hybrid weighted average. - - 'absent_model_aware_avg': weighted average that takes into account the absent model. + - 'absent_model_aware_avg': weighted average that takes into account + the absent model. allows_overflow: bool - Whether to allow the confidence score of the fused detections to exceed 1. + Whether to allow the confidence score of the fused detections to + exceed 1. """ - weights: list[float] | None iou_thr: float skip_box_thr: float @@ -73,10 +69,6 @@ def fuse_ensemble_detections( "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted" ], fusion_method_kwargs: dict | None = None, - fusion_method: Literal[ - "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted" - ], - fusion_method_kwargs: dict | None = None, max_n_detections: int = 500, ) -> xr.Dataset: """Fuse ensemble detections across models using WBF.""" @@ -84,9 +76,9 @@ def fuse_ensemble_detections( image_shape = ensemble_detections_ds.attrs.get("image_shape") if image_shape is None: raise KeyError( - "Required attribute 'image_shape' not found in the dataset attributes. " - "Please ensure the dataset has 'image_shape' (width, height in pixels) " - "in its attributes." + "Required attribute 'image_shape' not found in the dataset " + "attributes. Please ensure the dataset has 'image_shape' " + "(width, height in pixels) in its attributes." ) else: image_width_height = _validate_image_shape(image_shape) @@ -194,7 +186,7 @@ def _preprocess_single_image_detections( confidence: xr.DataArray, label: xr.DataArray, image_width_height: np.ndarray, -) -> list[np.ndarray]: +) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]: """Prepare ensemble detections on a single image for fusion.""" # Prepare boxes array --> position, shape arrays to x1y1x2y normalised bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None] @@ -252,7 +244,10 @@ def _postprocess_single_image_detections( image_width_height, max_n_detections, ): - """Unnormalise, pad and format fused single-image detections as data arrays.""" + """Postprocess fused single-image detections as dataarrays. + + Unnormalise, pad and format as data arrays. + """ # Undo boxes x1y1 x2y2 normalization ensemble_x1y1_x2y2 = ensemble_x1y1_x2y2_norm * np.tile( image_width_height, (1, 2) @@ -272,9 +267,11 @@ def _postprocess_single_image_detections( if ensemble_x1y2_x2y2_scores_labels.shape[0] > max_n_detections: raise ValueError( "Insufficient padding provided. " - f"The estimated maximum number of detections per image was set to {max_n_detections}, " - f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were found in one of the images " - "after fusion. Please increase the maximum number of detections per image." + "The estimated maximum number of detections per image was set to " + f"{max_n_detections}, " + f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were " + "found in one of the images after fusion. Please increase the " + "maximum number of detections per image." ) # Pad combined array to max_n_detections diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 92869d47..c21ac63b 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -27,6 +27,7 @@ class EnsembleDetector(LightningModule): """ def __init__(self, config_file: str | Path): + """Initialise ensemble of detectors.""" super().__init__() # Load config @@ -35,7 +36,7 @@ def __init__(self, config_file: str | Path): self.config = yaml.safe_load(f) # Run checks - self._validate_model_class() + self._validate_model_class(self.config["models"]["model_class"]) # Load list of models (nn.ModuleList) self.list_models = self._load_models() @@ -137,7 +138,11 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: ) # [sample][model] # Parse output from dicts - output_per_sample = {"boxes": [], "scores": [], "labels": []} + output_per_sample: dict[str, list] = { + "boxes": [], + "scores": [], + "labels": [], + } for ky in output_per_sample: output_per_sample[ky] = [ [sample[m][ky] for m in range(len(self.list_models))] @@ -146,7 +151,9 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: # Pad across models and across image_ids fill_value = {"boxes": np.nan, "scores": np.nan, "labels": -1} - output_per_sample_padded = {ky: [] for ky in output_per_sample} + output_per_sample_padded: dict[str, list] = { + ky: [] for ky in output_per_sample + } for ky in output_per_sample_padded: output_per_sample_padded[ky] = pad_to_max_first_dimension( [ diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py index 75d8e907..0ab1e2f8 100644 --- a/ethology/detectors/ensembles/utils.py +++ b/ethology/detectors/ensembles/utils.py @@ -11,7 +11,7 @@ def get_padding_width(array, max_n): def pad_to_max_first_dimension(list_arrays, fill_value=np.nan): - """Pad arrays to maximum number across all arrays in the first dimension.""" + """Pad arrays in list to maximum size of their first dimension.""" max_n_detections = max(array.shape[0] for array in list_arrays) list_arrays_padded = [ np.pad( From 7c076a247463f65b606ca5b52fe483b336c71b98 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 12:05:30 +0000 Subject: [PATCH 14/39] Small comments --- ethology/detectors/ensembles/models.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index c21ac63b..a7e8ea8d 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -108,8 +108,6 @@ def predict_step(self, batch, batch_idx): """Predict step for a single batch.""" # ------------------------------ # Run all models in ensemble in GPU - # TODO: can I vectorize this? - # https://docs.pytorch.org/tutorials/intermediate/ensembling.html images_batch, _annotations_batch = batch raw_prediction_dicts_per_model = [ model(images_batch) for model in self.list_models From a610e2036ae486c3097482a216a267b3ac9a5393 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:04:09 +0000 Subject: [PATCH 15/39] Fix weights loading --- ethology/detectors/ensembles/models.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index a7e8ea8d..7ef8311f 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -54,12 +54,8 @@ def _validate_model_class(model_class_str: str) -> None: def _load_models(self) -> nn.ModuleList: """Load models from checkpoints.""" - # Get model architecture + # Get model config models_config = self.config["models"] - model = get_model( - models_config["model_class"], - **models_config.get("model_kwargs", {}), - ) # Load weights list_models = [] @@ -67,10 +63,15 @@ def _load_models(self) -> nn.ModuleList: # Get checkpoint checkpoint = torch.load(checkpoint_path, map_location=self.device) - # Load state dict + # Instantiate model with ckpt weights + model = get_model( + models_config["model_class"], + **models_config.get("model_kwargs", {}), + ) model_state_dict = self._get_model_state_dict(checkpoint) model.load_state_dict(model_state_dict, strict=True) + # Append model to list list_models.append(model) return nn.ModuleList(list_models) From c6c05eefa3997dd9bb75f4501aec916276ff4a8e Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:11:03 +0000 Subject: [PATCH 16/39] Filter low confidence predictions when evaluating single models in the ensemble --- examples/ensemble_of_detectors.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index dcb28d1c..ea81dd8f 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -245,7 +245,6 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # TODO: think whether joblib approach is more readable? image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] ensemble_detections_ds.attrs["image_shape"] = image_width_height - config_fusion = config["fusion"] @@ -324,8 +323,14 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Evaluate single models list_detections_ds_eval = [] for k in range(ensemble_detections_ds.sizes["model"]): + # filter low confidence detections (for a fairer comparison) + detections_one_model = ensemble_detections_ds.where( + ensemble_detections_ds.confidence >= confidence_threshold_post_fusion + ).sel(model=k) + + # evaluate detections_ds, _ = compute_precision_recall_ds( - pred_bboxes_ds=ensemble_detections_ds.sel(model=k), + pred_bboxes_ds=detections_one_model, gt_bboxes_ds=gt_bboxes_ds, iou_threshold=iou_threshold_tp, ) From db5910e5693843ab49d7ae77094622f0a77d45d5 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 17:20:57 +0000 Subject: [PATCH 17/39] Rename variable --- ethology/detectors/ensembles/fusion.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index a48914ac..4f46b3a4 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -254,32 +254,30 @@ def _postprocess_single_image_detections( ) # Combine x1y1, x2y2, scores and labels in one array - ensemble_x1y2_x2y2_scores_labels = np.c_[ - ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels - ] + ensemble_data = np.c_[ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels] # Remove rows with nan coordinates - ensemble_x1y2_x2y2_scores_labels = ensemble_x1y2_x2y2_scores_labels[ + ensemble_data = ensemble_data[ ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1) ] # Check padding - if ensemble_x1y2_x2y2_scores_labels.shape[0] > max_n_detections: + if ensemble_data.shape[0] > max_n_detections: raise ValueError( "Insufficient padding provided. " "The estimated maximum number of detections per image was set to " f"{max_n_detections}, " - f"but {ensemble_x1y2_x2y2_scores_labels.shape[0]} detections were " + f"but {ensemble_data.shape[0]} detections were " "found in one of the images after fusion. Please increase the " "maximum number of detections per image." ) # Pad combined array to max_n_detections # (this is required to concatenate across image_ids) - ensemble_x1y2_x2y2_scores_labels = np.pad( - ensemble_x1y2_x2y2_scores_labels, + ensemble_data = np.pad( + ensemble_data, ( - (0, max_n_detections - ensemble_x1y2_x2y2_scores_labels.shape[0]), + (0, max_n_detections - ensemble_data.shape[0]), (0, 0), ), "constant", @@ -289,9 +287,9 @@ def _postprocess_single_image_detections( # Format output as xarray dataarrays centroid_da, shape_da, confidence_da, label_da = ( _single_image_detections_as_dataarrays( - ensemble_x1y2_x2y2_scores_labels[:, 0:4], - ensemble_x1y2_x2y2_scores_labels[:, 4], - ensemble_x1y2_x2y2_scores_labels[:, 5], + ensemble_data[:, 0:4], + ensemble_data[:, 4], + ensemble_data[:, 5], ) ) From ef689897c7acae90d645a2bd1fc8a3fcd58da0e9 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:03:20 +0000 Subject: [PATCH 18/39] Compute upper bound for max number of detections after fusion automatically --- ethology/detectors/ensembles/fusion.py | 48 +++++++++++++++++++------- 1 file changed, 36 insertions(+), 12 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 4f46b3a4..8f29985a 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -18,6 +18,10 @@ "non_maxium_weighted": ensemble_boxes.non_maximum_weighted, } +fusion_method_type = Literal[ + "weighted_boxes_fusion", "nms", "soft_nms", "non_maxium_weighted" +] + class _TypeFusionKwargs(TypedDict, total=False): """Type hints for fusion method kwargs. @@ -63,15 +67,22 @@ class _TypeFusionKwargs(TypedDict, total=False): @_check_output(ValidBboxDetectionsDataset) -def fuse_ensemble_detections( +def fuse_detections( ensemble_detections_ds: xr.Dataset, - fusion_method: Literal[ - "weighted_boxes_fusion", "nms", "soft_nms", "non_maximum_weighted" - ], + fusion_method: fusion_method_type, fusion_method_kwargs: dict | None = None, - max_n_detections: int = 500, + max_n_detections: int | None = None, ) -> xr.Dataset: - """Fuse ensemble detections across models using WBF.""" + """Fuse ensemble detections across models using WBF. + + You can set a max_n_detections if upper bound is known a prior to + reduce memory usage. + + """ + # Check if input dataset has 'model' dimension + if "model" not in ensemble_detections_ds.dims: + raise ValueError("Input dataset must have 'model' dimension. ") + # Check if image_width_height defined in dataset image_shape = ensemble_detections_ds.attrs.get("image_shape") if image_shape is None: @@ -83,6 +94,10 @@ def fuse_ensemble_detections( else: image_width_height = _validate_image_shape(image_shape) + # Compute upper bound of max_n_detections + if not max_n_detections: + max_n_detections = _estimate_max_n_detections(ensemble_detections_ds) + # Build single-image partial fusion function for the selected method if fusion_method not in VALID_FUSION_METHODS: raise ValueError( @@ -94,10 +109,6 @@ def fuse_ensemble_detections( _fuse_single_image_detections, fusion_function ) - # Prepare kwargs for fusion function - if not fusion_method_kwargs: - fusion_method_kwargs = {} - # Run fusion across image_id using apply_ufunc centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( xr.apply_ufunc( @@ -109,7 +120,7 @@ def fuse_ensemble_detections( kwargs={ "image_width_height": image_width_height, "max_n_detections": max_n_detections, - **fusion_method_kwargs, + **(fusion_method_kwargs if fusion_method_kwargs else {}), }, input_core_dims=[ # do not broadcast across these ["space", "id", "model"], # centroid @@ -132,7 +143,7 @@ def fuse_ensemble_detections( ) ) - # Post process data arrays + # Postprocess data arrays fused_data_arrays = { "position": centroid_fused_da, "shape": shape_fused_da, @@ -180,6 +191,19 @@ def _validate_image_shape(image_shape) -> np.ndarray: return image_shape +def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int: + """Get upper bound for maximum number of boxes per image after fusion.""" + detections_w_non_nan_position = ( + ensemble_detections_ds.position.notnull().all(dim="space") + ) # True if non-nan x and y + return ( + detections_w_non_nan_position.sum(dim="id") + .max(dim="image_id") + .sum() + .item() + ) + + def _preprocess_single_image_detections( position: xr.DataArray, shape: xr.DataArray, From bce1ee701e18fd612f0ee82b69492e958b931184 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:11:50 +0000 Subject: [PATCH 19/39] Use new validators module --- ethology/detectors/ensembles/fusion.py | 4 +- ethology/detectors/ensembles/models.py | 4 +- ethology/detectors/evaluate.py | 2 +- ethology/io/detections/validate.py | 48 ---------- ethology/io/validate.py | 121 ------------------------- examples/ensemble_of_detectors.py | 41 +++++---- 6 files changed, 27 insertions(+), 193 deletions(-) delete mode 100644 ethology/io/detections/validate.py delete mode 100644 ethology/io/validate.py diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 8f29985a..415f9591 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -8,8 +8,8 @@ import numpy as np import xarray as xr -from ethology.io.detections.validate import ValidBboxDetectionsDataset -from ethology.io.validate import _check_output +from ethology.validators.detections import ValidBboxDetectionsDataset +from ethology.validators.utils import _check_output VALID_FUSION_METHODS = { "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion, diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 7ef8311f..2d339ba2 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -12,8 +12,8 @@ from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import pad_to_max_first_dimension -from ethology.io.detections.validate import ValidBboxDetectionsDataset -from ethology.io.validate import _check_output +from ethology.validators.detections import ValidBboxDetectionsDataset +from ethology.validators.utils import _check_output class EnsembleDetector(LightningModule): diff --git a/ethology/detectors/evaluate.py b/ethology/detectors/evaluate.py index f991420c..1ae34104 100644 --- a/ethology/detectors/evaluate.py +++ b/ethology/detectors/evaluate.py @@ -81,7 +81,7 @@ def evaluate_detections_hungarian_ds( def _evaluate_detections_hungarian_arrays( pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, iou_threshold: float -) -> tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: +) -> tuple[np.ndarray, ...]: """Compute true positives, false positives, and missed detections. Uses Hungarian algorithm for matching and takes arrays of bboxes as input diff --git a/ethology/io/detections/validate.py b/ethology/io/detections/validate.py deleted file mode 100644 index 7ef6285d..00000000 --- a/ethology/io/detections/validate.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Validators for detection datasets.""" - -from attrs import define, field - -from ethology.io.validate import ValidDataset - - -@define -class ValidBboxDetectionsDataset(ValidDataset): - """Class for valid ``ethology`` bounding box detections datasets. - - It checks that the input dataset has: - - - ``image_id``, ``space``, ``id`` as dimensions - - ``position``, ``shape`` and ``confidence`` as data variables - - Attributes - ---------- - dataset : xarray.Dataset - The xarray dataset to validate. - required_dims : set - Set of required dimension names. - required_data_vars : set - Set of required data variable names. - - Raises - ------ - TypeError - If the input is not an xarray Dataset. - ValueError - If the dataset is missing required data variables or dimensions. - - Notes - ----- - The dataset can have other data variables and dimensions, but only the - required ones are checked. - - """ - - # Minimum requirements for a bbox dataset holding detections - required_dims: set = field( - default={"image_id", "space", "id"}, - init=False, - ) - required_data_vars: set = field( - default={"position", "shape", "confidence"}, - init=False, - ) diff --git a/ethology/io/validate.py b/ethology/io/validate.py deleted file mode 100644 index 22c215f9..00000000 --- a/ethology/io/validate.py +++ /dev/null @@ -1,121 +0,0 @@ -"""Utils for validating `ethology` objects.""" - -from abc import ABC, abstractmethod -from collections.abc import Callable -from functools import wraps - -import xarray as xr -from attrs import define, field - - -@define -class ValidDataset(ABC): - """An abstract base class for valid ``ethology`` datasets. - - It checks that the input dataset has: - - - required dimensions - - required data variables - - Subclasses must define ``required_dims`` and ``required_data_vars`` - attributes. - - Attributes - ---------- - dataset : xarray.Dataset - The xarray dataset to validate. - required_dims : set - Set of required dimension names (defined by subclasses). - required_data_vars : set - Set of required data variable names (defined by subclasses). - - Raises - ------ - TypeError - If the input is not an xarray Dataset. - ValueError - If the dataset is missing required data variables or dimensions. - - Notes - ----- - The dataset can have other data variables and dimensions, but only the - required ones are checked. - - """ - - dataset: xr.Dataset = field() - - # Subclasses should override these abstract properties - @property - @abstractmethod - def required_dims(self) -> set: - """Subclasses must provide a required_dims property.""" - pass - - @property - @abstractmethod - def required_data_vars(self) -> set: - """Subclasses must provide a required_data_vars property.""" - pass - - # Validators - @dataset.validator - def _check_dataset_type(self, attribute, value): - """Ensure the input is an xarray Dataset.""" - if not isinstance(value, xr.Dataset): - raise TypeError( - f"Expected an xarray Dataset, but got {type(value)}." - ) - - @dataset.validator - def _check_required_data_variables(self, attribute, value): - """Ensure the dataset has all required data variables.""" - missing_vars = self.required_data_vars - set(value.data_vars) - if missing_vars: - raise ValueError( - f"Missing required data variables: {sorted(missing_vars)}" - ) - - @dataset.validator - def _check_required_dimensions(self, attribute, value): - """Ensure the dataset has all required dimensions.""" - missing_dims = self.required_dims - set(value.dims) - if missing_dims: - raise ValueError( - f"Missing required dimensions: {sorted(missing_dims)}" - ) - - -def _check_output(validator: type): - """Return a decorator that validates the output of a function.""" - - def decorator(function: Callable) -> Callable: - @wraps(function) # to preserve function metadata - def wrapper(*args, **kwargs): - result = function(*args, **kwargs) - validator(result) - return result - - return wrapper - - return decorator - - -def _check_input(validator: type, input_index: int = 0): - """Return a decorator that validates a specific input of a function. - - By default, the first input is validated. If the input index is - larger than the number of inputs, no validation is performed. - """ - - def decorator(function: Callable) -> Callable: - @wraps(function) - def wrapper(*args, **kwargs): - if len(args) > input_index: - validator(args[input_index]) - result = function(*args, **kwargs) - return result - - return wrapper - - return decorator diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index ea81dd8f..11e9e862 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -1,3 +1,4 @@ +"""Evaluating ensemble of trained detectors.""" # %% # imports @@ -13,7 +14,7 @@ from torch.utils.data import DataLoader from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 -from ethology.detectors.ensembles.fusion import fuse_ensemble_detections +from ethology.detectors.ensembles.fusion import fuse_detections from ethology.detectors.ensembles.models import EnsembleDetector from ethology.detectors.evaluate import compute_precision_recall_ds from ethology.io.annotations import load_bboxes @@ -165,11 +166,13 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: "fusion": { "method": "weighted_boxes_fusion", # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted" - "method_kwargs": { # arguments as in ensemble_boxes.weighted_boxes_fusion + "method_kwargs": { + # arguments as in ensemble_boxes.weighted_boxes_fusion "iou_thr": 0.5, # iou threshold for the ensemble "skip_box_thr": 0.0001, }, - # "n_jobs": -1, # workers for joblib.Parallel, n_workers should be <= number of CPU cores + # "n_jobs": -1, # workers for joblib.Parallel, + # n_workers should be <= number of CPU cores # "confidence_threshold_post_fusion": 0.0, "max_n_detections": 300, }, @@ -245,11 +248,10 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # TODO: think whether joblib approach is more readable? image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] ensemble_detections_ds.attrs["image_shape"] = image_width_height -config_fusion = config["fusion"] +config_fusion: dict = config["fusion"] -# %% -fused_detections_ds = fuse_ensemble_detections( +fused_detections_ds = fuse_detections( ensemble_detections_ds, fusion_method=config_fusion["method"], fusion_method_kwargs=config_fusion["method_kwargs"], @@ -261,18 +263,18 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Fuse detections across models with NMS -fused_detections_nms_ds = fuse_ensemble_detections( - ensemble_detections_ds, - fusion_method="soft_nms", - fusion_method_kwargs={ - "iou_thr": config_fusion["method_kwargs"]["iou_thr"], - "sigma": 0.5, - "thresh": 0.001, - }, - max_n_detections=500, -) - -fused_detections_ds = fused_detections_nms_ds +# fused_detections_nms_ds = fuse_ensemble_detections( +# ensemble_detections_ds, +# fusion_method="soft_nms", +# fusion_method_kwargs={ +# "iou_thr": config_fusion["method_kwargs"]["iou_thr"], +# "sigma": 0.5, +# "thresh": 0.001, +# }, +# max_n_detections=500, +# ) + +# fused_detections_ds = fused_detections_nms_ds # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Remove low confidence detections confidence_threshold_post_fusion = 0.5 @@ -294,7 +296,8 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: iou_threshold=iou_threshold_tp, ) -# All models on full August dataset, without removing low confidence detections: +# All models on full August dataset, without removing low +# confidence detections: # confidence_threshold_post_fusion = 0.0 # Precision: 0.5920 # Recall: 0.8455 From c26af244d63ea50d5e88a4b4a980f2a2509ed66b Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Fri, 21 Nov 2025 18:21:04 +0000 Subject: [PATCH 20/39] Add an ensemble detections dataset validator --- ethology/detectors/ensembles/fusion.py | 13 +++--- ethology/detectors/ensembles/models.py | 4 +- ethology/validators/detections.py | 60 ++++++++++++++++++++++++++ 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 415f9591..d04c5d48 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -8,8 +8,11 @@ import numpy as np import xarray as xr -from ethology.validators.detections import ValidBboxDetectionsDataset -from ethology.validators.utils import _check_output +from ethology.validators.detections import ( + ValidBboxDetectionsDataset, + ValidBboxDetectionsEnsembleDataset, +) +from ethology.validators.utils import _check_input, _check_output VALID_FUSION_METHODS = { "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion, @@ -66,6 +69,7 @@ class _TypeFusionKwargs(TypedDict, total=False): allows_overflow: bool +@_check_input(ValidBboxDetectionsEnsembleDataset) @_check_output(ValidBboxDetectionsDataset) def fuse_detections( ensemble_detections_ds: xr.Dataset, @@ -79,10 +83,6 @@ def fuse_detections( reduce memory usage. """ - # Check if input dataset has 'model' dimension - if "model" not in ensemble_detections_ds.dims: - raise ValueError("Input dataset must have 'model' dimension. ") - # Check if image_width_height defined in dataset image_shape = ensemble_detections_ds.attrs.get("image_shape") if image_shape is None: @@ -191,6 +191,7 @@ def _validate_image_shape(image_shape) -> np.ndarray: return image_shape +@_check_input(ValidBboxDetectionsEnsembleDataset) def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int: """Get upper bound for maximum number of boxes per image after fusion.""" detections_w_non_nan_position = ( diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 2d339ba2..7545c4b9 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -12,7 +12,7 @@ from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import pad_to_max_first_dimension -from ethology.validators.detections import ValidBboxDetectionsDataset +from ethology.validators.detections import ValidBboxDetectionsEnsembleDataset from ethology.validators.utils import _check_output @@ -125,7 +125,7 @@ def predict_step(self, batch, batch_idx): return raw_prediction_dicts_per_sample - @_check_output(ValidBboxDetectionsDataset) + @_check_output(ValidBboxDetectionsEnsembleDataset) def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: """Format as ethology detections dataset with model axis.""" # Get results from trainer diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py index 1f6d9df6..a22dab62 100644 --- a/ethology/validators/detections.py +++ b/ethology/validators/detections.py @@ -62,3 +62,63 @@ class ValidBboxDetectionsDataset(ValidDataset): }, init=False, ) + + +@define +class ValidBboxDetectionsEnsembleDataset(ValidDataset): + """Class for valid ``ethology`` bounding box ensembledetections datasets. + + This class validates that the input dataset: + + - is an xarray Dataset, + - has ``image_id``, ``space``, ``id`` and ``model`` as dimensions, + - has ``position``, ``shape`` and ``confidence`` as data variables, + - ``position`` and ``shape`` span at least the dimensions ``image_id``, + ``space``, ``id`` and ``model``, + - ``confidence`` spans at least the dimensions ``image_id``, ``id`` + and ``model``. + + + Attributes + ---------- + dataset : xarray.Dataset + The xarray dataset to validate. + required_dims : set + The set of required dimension names: ``image_id``, ``space``, ``id`` + and ``model``. + required_data_vars : dict[str, set] + A dictionary mapping data variable names to their required minimum + dimensions: + + - ``position`` maps to ``image_id``, ``space``, ``id`` and ``model``, + - ``shape`` maps to ``image_id``, ``space``, ``id`` and ``model``, + - ``confidence`` maps to ``image_id``, ``id`` and ``model``. + + Raises + ------ + TypeError + If the input is not an xarray Dataset. + ValueError + If the dataset is missing required data variables or dimensions, + or if any required dimensions are missing for any data variable. + + Notes + ----- + The dataset can have other data variables and dimensions, but only the + required ones are checked. + + """ + + # Minimum requirements for a bbox dataset holding detections + required_dims: set = field( + default={"image_id", "space", "id", "model"}, + init=False, + ) + required_data_vars: dict = field( + default={ + "position": {"image_id", "space", "id", "model"}, + "shape": {"image_id", "space", "id", "model"}, + "confidence": {"image_id", "id", "model"}, + }, + init=False, + ) From 4440c12b3f813c8d0f01aa5d1bc68a577f6bd622 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:55:52 +0000 Subject: [PATCH 21/39] Fix output validator --- ethology/detectors/ensembles/fusion.py | 25 +++++++++++++------------ ethology/detectors/ensembles/models.py | 1 - examples/ensemble_of_detectors.py | 8 ++++++-- 3 files changed, 19 insertions(+), 15 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index d04c5d48..3af74e52 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -144,14 +144,11 @@ def fuse_detections( ) # Postprocess data arrays - fused_data_arrays = { - "position": centroid_fused_da, - "shape": shape_fused_da, - "confidence": confidence_fused_da, - "label": label_fused_da, - } fused_data_arrays = _postprocess_multi_image_fused_arrays( - **fused_data_arrays + position = centroid_fused_da, + shape = shape_fused_da, + confidence = confidence_fused_da, + label = label_fused_da, ) # Return a dataset @@ -408,7 +405,7 @@ def _postprocess_multi_image_fused_arrays( """Postprocess fused data arrays on multiple images after fusion.""" data_arrays = [position, shape, confidence, label] - # Remove padding across annotations + # Remove extra padding across annotations position_da, shape_da, confidence_da, label_da = [ da.dropna(dim="id", how="all") for da in data_arrays ] @@ -416,9 +413,13 @@ def _postprocess_multi_image_fused_arrays( # Pad labels with -1 rather than nan label_da = label_da.fillna(-1).astype(int) + # Assign id coordinates to data arrays + # (these are lost after apply_ufunc because exclude_dims is used) + n_max_detections = position_da.sizes["id"] + id_coords = np.arange(n_max_detections) return { - "position": position_da, - "shape": shape_da, - "confidence": confidence_da, - "label": label_da, + "position": position_da.assign_coords(id=id_coords), + "shape": shape_da.assign_coords(id=id_coords), + "confidence": confidence_da.assign_coords(id=id_coords), + "label": label_da.assign_coords(id=id_coords), } diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 7545c4b9..03d20211 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -107,7 +107,6 @@ def _get_model_state_dict(checkpoint): def predict_step(self, batch, batch_idx): """Predict step for a single batch.""" - # ------------------------------ # Run all models in ensemble in GPU images_batch, _annotations_batch = batch raw_prediction_dicts_per_model = [ diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index 11e9e862..e815b26b 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -210,7 +210,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # Some nice plots: # ensemble_detections_ds.confidence.sel(image_id=0).plot() # ensemble_detections_ds.confidence.sel(model=0).plot() -for m in range(5): +for m in range(ensemble_detections_ds.model.size): plt.figure() ensemble_detections_ds.confidence.sel(model=m).plot() @@ -260,6 +260,10 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # ---- method kwargs ---- ) +# %% +from ethology.validators.detections import ValidBboxDetectionsDataset +ValidBboxDetectionsDataset(fused_detections_ds) + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Fuse detections across models with NMS @@ -277,7 +281,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # fused_detections_ds = fused_detections_nms_ds # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% # Remove low confidence detections -confidence_threshold_post_fusion = 0.5 +confidence_threshold_post_fusion = 0.4 fused_detections_ds_ = fused_detections_ds.where( fused_detections_ds.confidence >= confidence_threshold_post_fusion ) From b38080dbba6fa25b7d29bd4a83691a18f76526ac Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:56:33 +0000 Subject: [PATCH 22/39] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ethology/detectors/ensembles/fusion.py | 10 +++++----- examples/ensemble_of_detectors.py | 1 + 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 3af74e52..1bd08ed1 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -145,10 +145,10 @@ def fuse_detections( # Postprocess data arrays fused_data_arrays = _postprocess_multi_image_fused_arrays( - position = centroid_fused_da, - shape = shape_fused_da, - confidence = confidence_fused_da, - label = label_fused_da, + position=centroid_fused_da, + shape=shape_fused_da, + confidence=confidence_fused_da, + label=label_fused_da, ) # Return a dataset @@ -413,7 +413,7 @@ def _postprocess_multi_image_fused_arrays( # Pad labels with -1 rather than nan label_da = label_da.fillna(-1).astype(int) - # Assign id coordinates to data arrays + # Assign id coordinates to data arrays # (these are lost after apply_ufunc because exclude_dims is used) n_max_detections = position_da.sizes["id"] id_coords = np.arange(n_max_detections) diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py index e815b26b..d8ebb51c 100644 --- a/examples/ensemble_of_detectors.py +++ b/examples/ensemble_of_detectors.py @@ -262,6 +262,7 @@ def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: # %% from ethology.validators.detections import ValidBboxDetectionsDataset + ValidBboxDetectionsDataset(fused_detections_ds) # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From b88a412fbf7b2c814480aad2703ba5b3021c7da0 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:56:34 +0000 Subject: [PATCH 23/39] Add ensembles yaml --- examples/ensemble_of_detectors.yaml | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 examples/ensemble_of_detectors.yaml diff --git a/examples/ensemble_of_detectors.yaml b/examples/ensemble_of_detectors.yaml new file mode 100644 index 00000000..80de260b --- /dev/null +++ b/examples/ensemble_of_detectors.yaml @@ -0,0 +1,19 @@ +models: + model_class: fasterrcnn_resnet50_fpn_v2 + model_kwargs: + num_classes: 2 + weights: null + weights_backbone: null + checkpoints: + - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt + - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt + - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/75583ec227e3444ab692b99c64795325/checkpoints/last.ckpt + - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/4acc37206b1e4f679d535c837bee2c2f/checkpoints/last.ckpt + - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/fdcf88fcbcc84fbeb94b45ca6b6f8914/checkpoints/last.ckpt + - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/daa05ded0ea047388c9134bf044061c5/checkpoints/last.ckpt +fusion: + method: weighted_boxes_fusion + method_kwargs: + iou_thr: 0.5 + skip_box_thr: 0.0001 + max_n_detections: 300 From 1b9476a0d2c1353c269963ff23f0b8844c671981 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 09:39:59 +0000 Subject: [PATCH 24/39] Stop tracking evaluation --- ethology/detectors/evaluate.py | 245 ------------------- examples/ensemble_of_detectors.py | 353 ---------------------------- examples/ensemble_of_detectors.yaml | 19 -- 3 files changed, 617 deletions(-) delete mode 100644 ethology/detectors/evaluate.py delete mode 100644 examples/ensemble_of_detectors.py delete mode 100644 examples/ensemble_of_detectors.yaml diff --git a/ethology/detectors/evaluate.py b/ethology/detectors/evaluate.py deleted file mode 100644 index 1ae34104..00000000 --- a/ethology/detectors/evaluate.py +++ /dev/null @@ -1,245 +0,0 @@ -"""Utilities for evaluating detectors.""" - -import numpy as np -import torch -import torchvision.ops as ops -import xarray as xr -from scipy.optimize import linear_sum_assignment - - -def evaluate_detections_hungarian_ds( - pred_bboxes_ds: xr.Dataset, - gt_bboxes_ds: xr.Dataset, - iou_threshold: float, -) -> tuple[xr.Dataset, xr.Dataset]: - """Compute true positives, false positives, and missed detections. - - Uses Hungarian algorithm for matching. - """ - # Add xy_min and xy_max if not present - if all( - [ - var_str not in pred_bboxes_ds.variables - for var_str in ["xy_min", "xy_max"] - ] - ): - pred_bboxes_ds = _add_bboxes_min_max_corners(pred_bboxes_ds) - - if all( - [ - var_str not in gt_bboxes_ds.variables - for var_str in ["xy_min", "xy_max"] - ] - ): - gt_bboxes_ds = _add_bboxes_min_max_corners(gt_bboxes_ds) - - # Prepare input for hungarian - pred_bboxes_x1y1_x2y2 = xr.concat( - [pred_bboxes_ds.xy_min, pred_bboxes_ds.xy_max], dim="space" - ).transpose("image_id", "id", "space") - - gt_bboxes_x1y1_x2y2 = xr.concat( - [gt_bboxes_ds.xy_min, gt_bboxes_ds.xy_max], dim="space" - ).transpose("image_id", "id", "space") - - # rename id dimension in gt_bboxes_x1y1_x2y2 - gt_bboxes_x1y1_x2y2 = gt_bboxes_x1y1_x2y2.rename({"id": "id_gt"}) - - # Run hungarian vectorized - tp_array, fp_array, md_array, iou_tp_array = xr.apply_ufunc( - _evaluate_detections_hungarian_arrays, - pred_bboxes_x1y1_x2y2, - gt_bboxes_x1y1_x2y2, - kwargs={"iou_threshold": iou_threshold}, - input_core_dims=[ - ["id", "space"], - ["id_gt", "space"], - ], - output_core_dims=[ - ["id"], - ["id"], - ["id_gt"], - ["id"], - ], - vectorize=True, - exclude_dims={"id", "id_gt"}, - ) - - # Add to datasets - pred_bboxes_ds["tp"] = xr.DataArray(tp_array, dims=["image_id", "id"]) - pred_bboxes_ds["fp"] = xr.DataArray(fp_array, dims=["image_id", "id"]) - pred_bboxes_ds["iou_tp"] = xr.DataArray( - iou_tp_array, dims=["image_id", "id"] - ) - - # rename id dimension in md_array - md_array = md_array.rename({"id_gt": "id"}) - gt_bboxes_ds["md"] = xr.DataArray(md_array, dims=["image_id", "id"]) - - return pred_bboxes_ds, gt_bboxes_ds - - -def _evaluate_detections_hungarian_arrays( - pred_bboxes: np.ndarray, gt_bboxes: np.ndarray, iou_threshold: float -) -> tuple[np.ndarray, ...]: - """Compute true positives, false positives, and missed detections. - - Uses Hungarian algorithm for matching and takes arrays of bboxes as input - in x1y1x2y2 format. - - Parameters - ---------- - pred_bboxes : np.ndarray - An array of prediction bounding boxes with the first four columns being - the coordinates of the bounding box in the format [x1, y1, x2, y2] - gt_bboxes : np.ndarray - An array of ground truth bounding boxes with the first four columns - being the coordinates of the bounding box in the format - [x1, y1, x2, y2] - iou_threshold : float - IoU threshold for considering a detection as true positive - - Returns - ------- - tuple - A tuple of four boolean arrays: - - true_positives: True for each predicted bbox that is a true positive - - false_positives: True for each predicted bbox that is a false - positive - - missed_detections: True for each ground truth bbox that is missed - - true_positives_iou: IoU of each true positive - - Notes - ----- - The output arrays are padded with False to the length of the original - arrays. This means that for example where the true_positives array is - False, that does not necessarily mean that the prediction is a false - positive. The same applies for the true_positives_iou array, which is - padded with nan. - - """ - # Remove nan values - n_pred_bboxes_padded = pred_bboxes.shape[0] - n_gt_bboxes_padded = gt_bboxes.shape[0] - pred_bboxes = pred_bboxes[~np.isnan(pred_bboxes).any(axis=1), :] - gt_bboxes = gt_bboxes[~np.isnan(gt_bboxes).any(axis=1), :] - - # Initialize output arrays - true_positives = np.zeros(len(pred_bboxes), dtype=bool) - false_positives = np.zeros(len(pred_bboxes), dtype=bool) - matched_gts = np.zeros(len(gt_bboxes), dtype=bool) - missed_detections = np.zeros(len(gt_bboxes), dtype=bool) # unmatched gts - - true_positives_iou = np.zeros(len(pred_bboxes), dtype=float) - - # cast as a tensor if not already - if not isinstance(pred_bboxes, torch.Tensor): - pred_bboxes = torch.from_numpy(pred_bboxes).float() - if not isinstance(gt_bboxes, torch.Tensor): - gt_bboxes = torch.from_numpy(gt_bboxes).float() - - if len(pred_bboxes) > 0 and len(gt_bboxes) > 0: - # Compute IoU matrix (pred_bboxes x gt_bboxes) - iou_matrix = ops.box_iou(pred_bboxes[:, :4], gt_bboxes).cpu().numpy() - # iou_matrix[np.isnan(iou_matrix)] = -np.inf - - # Use Hungarian algorithm to find optimal assignment - pred_indices, gt_indices = linear_sum_assignment( - iou_matrix, maximize=True - ) - - # Mark true positives and false positives based on optimal assignment - for pred_idx, gt_idx in zip(pred_indices, gt_indices, strict=True): - if iou_matrix[pred_idx, gt_idx] > iou_threshold: - true_positives[pred_idx] = True - matched_gts[gt_idx] = True - true_positives_iou[pred_idx] = iou_matrix[pred_idx, gt_idx] - else: - false_positives[pred_idx] = True - - # Mark unmatched predictions as false positives - false_positives[~true_positives] = True - - # Mark unmatched ground truth as missed detections - missed_detections[~matched_gts] = True - - elif len(pred_bboxes) == 0 and len(gt_bboxes) > 0: - # No predictions, all ground truth are missed - missed_detections[:] = True - elif len(pred_bboxes) > 0 and len(gt_bboxes) == 0: - # No ground truth, all predictions are false positives - false_positives[:] = True - - # Pad tp, fp for pred_bboxes with False - tp_fp_pred_bboxes_padded: tuple[np.ndarray, ...] = () - for output in [true_positives, false_positives]: - output_padded = np.pad( - output, - (0, n_pred_bboxes_padded - len(output)), - mode="constant", - constant_values=False, - ) - tp_fp_pred_bboxes_padded += (output_padded,) - - # Pad true_positives_iou for pred_bboxes with nan - true_positives_iou_padded = np.pad( - true_positives_iou, - (0, n_pred_bboxes_padded - len(true_positives_iou)), - mode="constant", - constant_values=np.nan, - ) - - # Pad results for gt_bboxes with False - missed_detections_padded = np.pad( - missed_detections, - (0, n_gt_bboxes_padded - len(missed_detections)), - mode="constant", - constant_values=False, - ) - return tp_fp_pred_bboxes_padded + ( - missed_detections_padded, - true_positives_iou_padded, - ) - - -def compute_precision_recall_ds( - pred_bboxes_ds: xr.Dataset, - gt_bboxes_ds: xr.Dataset, - iou_threshold: float, -) -> tuple[xr.Dataset, xr.Dataset]: - """Compute precision and recall per image.""" - # Compute true positives, false positives, and missed detections - pred_bboxes_ds, gt_bboxes_ds = evaluate_detections_hungarian_ds( - pred_bboxes_ds=pred_bboxes_ds, - gt_bboxes_ds=gt_bboxes_ds, - iou_threshold=iou_threshold, - ) - - # Compute precision and recall per image - precision_per_img = pred_bboxes_ds.tp.sum(dim="id") / ( - pred_bboxes_ds.tp.sum(dim="id") + pred_bboxes_ds.fp.sum(dim="id") - ) - recall_per_img = pred_bboxes_ds.tp.sum(dim="id") / ( - pred_bboxes_ds.tp.sum(dim="id") + gt_bboxes_ds.md.sum(dim="id") - ) - - # Add to datasets - pred_bboxes_ds["precision"] = precision_per_img - pred_bboxes_ds["recall"] = recall_per_img - - return pred_bboxes_ds, gt_bboxes_ds - - -def _add_bboxes_min_max_corners(ds): - """Add xy_min and xy_max arrays to ds. - - # Compare to torchvision.ops.box_convert in testing? - box_convert( - torch.from_numpy(np.c_[ds.position.T, ds.shape.T]), - in_fmt="cxcywh", - out_fmt="xyxy", - ) - """ - ds["xy_min"] = ds.position - 0.5 * ds.shape - ds["xy_max"] = ds.position + 0.5 * ds.shape - return ds diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py deleted file mode 100644 index d8ebb51c..00000000 --- a/examples/ensemble_of_detectors.py +++ /dev/null @@ -1,353 +0,0 @@ -"""Evaluating ensemble of trained detectors.""" -# %% -# imports - -from pathlib import Path - -import numpy as np -import torch -import torchvision.transforms.v2 as transforms -import xarray as xr -import yaml -from lightning import Trainer -from matplotlib import pyplot as plt -from torch.utils.data import DataLoader -from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 - -from ethology.detectors.ensembles.fusion import fuse_detections -from ethology.detectors.ensembles.models import EnsembleDetector -from ethology.detectors.evaluate import compute_precision_recall_ds -from ethology.io.annotations import load_bboxes - -# %% -# %matplotlib widget -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -# Helper functions -def create_coco_dataset( - images_dir: str | Path, - annotations_file: str | Path, - composed_transform: transforms.Compose, -) -> CocoDetection: - """Create a COCO dataset for object detection. - - Note: transforms are applied to the full dataset. If the dataset - is later split, all splits will have the same transforms. - """ - dataset_coco = CocoDetection( - root=images_dir, - annFile=annotations_file, - transforms=composed_transform, - ) - - # wrap dataset for transforms v2 - dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco) - - return dataset_transformed - - -def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: - """Collate function for dataloader with varying number of bounding boxes. - - A custom function is needed for detection - because the number of bounding boxes varies - between images of the same batch. - See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop - - Parameters - ---------- - batch : tuple - a tuple of 2 tuples, the first one holding all images in the batch, - and the second one holding the corresponding annotations. - - Returns - ------- - tuple - a tuple of length = batch size, made up of (image, annotations) - tuples. - - """ - return tuple(zip(*batch, strict=True)) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Input data - -dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full") -images_dir = dataset_dir / "frames" -annotations_dir = dataset_dir / "annotations" -annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json" - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Define a dataloader -# Define transforms for inference -inference_transforms = transforms.Compose( - [ - transforms.ToImage(), - transforms.ToDtype(torch.float32, scale=True), - ] -) - -# Create COCO dataset -# TODO: convert from ethology detections dataset to COCO dataset -# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") -dataset_coco = create_coco_dataset( - images_dir=Path(dataset_dir) / "frames", - annotations_file=annotations_file_path, - composed_transform=inference_transforms, -) - -# dataloader -dataloader = DataLoader( - dataset_coco, - batch_size=12, - shuffle=False, - num_workers=4, - collate_fn=collate_fn_varying_n_bboxes, - persistent_workers=True, - # multiprocessing_context="fork" - # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available() - # else None, # see https://github.com/pytorch/pytorch/issues/87688 -) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Define a YAML config file for the ensemble of trained detectors -experiment_ID = "617393114420881798" -ml_runs_experiment_dir = ( - Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID -) -last_ckpt = Path("checkpoints") / "last.ckpt" - -config = { - "models": { - "model_class": "fasterrcnn_resnet50_fpn_v2", - # imported from torchvision.models.detection - "model_kwargs": { - "num_classes": 2, - "weights": None, # null in YAML becomes None in Python - "weights_backbone": None, - }, - "checkpoints": [ - str( - ml_runs_experiment_dir - / "f348d9d196934073bece1b877cbc4d38" - / last_ckpt - ), # above_0th - str( - ml_runs_experiment_dir - / "879d2f77e2b24adcb06b87d2fede6a04" - / last_ckpt - ), # above_1st - str( - ml_runs_experiment_dir - / "75583ec227e3444ab692b99c64795325" - / last_ckpt - ), # above_5th - str( - ml_runs_experiment_dir - / "4acc37206b1e4f679d535c837bee2c2f" - / last_ckpt - ), # above_10th - str( - ml_runs_experiment_dir - / "fdcf88fcbcc84fbeb94b45ca6b6f8914" - / last_ckpt - ), # above_25th - str( - ml_runs_experiment_dir - / "daa05ded0ea047388c9134bf044061c5" - / last_ckpt - ), # above_50th - ], - }, - "fusion": { - "method": "weighted_boxes_fusion", - # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted" - "method_kwargs": { - # arguments as in ensemble_boxes.weighted_boxes_fusion - "iou_thr": 0.5, # iou threshold for the ensemble - "skip_box_thr": 0.0001, - }, - # "n_jobs": -1, # workers for joblib.Parallel, - # n_workers should be <= number of CPU cores - # "confidence_threshold_post_fusion": 0.0, - "max_n_detections": 300, - }, -} -config_file = "ensemble_of_detectors.yaml" -with open(config_file, "w") as f: - yaml.dump(config, f, sort_keys=False) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Load the ensemble of detectors -ensemble_detector = EnsembleDetector(config_file) -print(f"Ensemble detector is on device: {ensemble_detector.device}") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Run the ensemble of detectors on a dataset -# Use Trainer for inference (this sets the device flexibly) -trainer = Trainer(accelerator="gpu", devices=1, logger=False) -_ = trainer.predict(ensemble_detector, dataloader) - - -# Format predictions as ethology detections dataset and add attrs -# TODO: think about syntax of format_predictions (should it be instance or -# static method instead?) -# Q: Can it just be output from .predict? -# TODO: dataloader to ethology detections dataset -gt_bboxes_ds = load_bboxes.from_files( - annotations_file_path, format="COCO", images_dirs=images_dir -) -ensemble_detections_ds = ensemble_detector.format_predictions( - attrs=gt_bboxes_ds.attrs -) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Some nice plots: -# ensemble_detections_ds.confidence.sel(image_id=0).plot() -# ensemble_detections_ds.confidence.sel(model=0).plot() -for m in range(ensemble_detections_ds.model.size): - plt.figure() - ensemble_detections_ds.confidence.sel(model=m).plot() - - -# %%%%%%%% -# All models predict less boxes and have less avg confidence per image in -# image_ids from 350 to 450. Let's inspect video names and images for these -# samples. - -# Add video name array -video_name = [ - ensemble_detections_ds.map_image_id_to_filename[img_id].split("_frame")[0] - for img_id in ensemble_detections_ds.image_id.values -] -ensemble_detections_ds["video"] = xr.DataArray(video_name, dims="image_id") - -# which videos? -np.unique(ensemble_detections_ds.video.sel(image_id=range(350, 450)).values) - -# %%%%%% -# Visualise image -for image_id in range(350, 450, 10): - image_filename = ensemble_detections_ds.map_image_id_to_filename[image_id] - image_path = ensemble_detections_ds.images_directories / image_filename - - # img = Image.open(image_path) - img = plt.imread(image_path) - - plt.figure() - plt.imshow(img) - plt.title(f"{image_filename}") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Fuse detections across models with WBF -# TODO: think whether joblib approach is more readable? -image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] -ensemble_detections_ds.attrs["image_shape"] = image_width_height -config_fusion: dict = config["fusion"] - - -fused_detections_ds = fuse_detections( - ensemble_detections_ds, - fusion_method=config_fusion["method"], - fusion_method_kwargs=config_fusion["method_kwargs"], - # max_n_detections=config_fusion["max_n_detections"], - # should be larger than expected maximum number of detections after fusion - # ---- method kwargs ---- -) - -# %% -from ethology.validators.detections import ValidBboxDetectionsDataset - -ValidBboxDetectionsDataset(fused_detections_ds) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Fuse detections across models with NMS - -# fused_detections_nms_ds = fuse_ensemble_detections( -# ensemble_detections_ds, -# fusion_method="soft_nms", -# fusion_method_kwargs={ -# "iou_thr": config_fusion["method_kwargs"]["iou_thr"], -# "sigma": 0.5, -# "thresh": 0.001, -# }, -# max_n_detections=500, -# ) - -# fused_detections_ds = fused_detections_nms_ds -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Remove low confidence detections -confidence_threshold_post_fusion = 0.4 -fused_detections_ds_ = fused_detections_ds.where( - fused_detections_ds.confidence >= confidence_threshold_post_fusion -) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Evaluate the ensemble model -# - load ground truth -# - compute metrics - -# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") - -iou_threshold_tp = 0.25 -fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds( - pred_bboxes_ds=fused_detections_ds_, - gt_bboxes_ds=gt_bboxes_ds, - iou_threshold=iou_threshold_tp, -) - -# All models on full August dataset, without removing low -# confidence detections: -# confidence_threshold_post_fusion = 0.0 -# Precision: 0.5920 -# Recall: 0.8455 -# --- -# confidence_threshold_post_fusion = 0.4 -# Precision: 0.8339 -# Recall: 0.7177 -# --- -# confidence_threshold_post_fusion = 0.5 -# Precision: 0.8714 -# Recall: 0.6624 -# --- - -print( - "Ensemble model with confidence threshold post fusion: " - f"{confidence_threshold_post_fusion:.2f}" -) -print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}") -print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Plot calibration curve - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Evaluate single models -list_detections_ds_eval = [] -for k in range(ensemble_detections_ds.sizes["model"]): - # filter low confidence detections (for a fairer comparison) - detections_one_model = ensemble_detections_ds.where( - ensemble_detections_ds.confidence >= confidence_threshold_post_fusion - ).sel(model=k) - - # evaluate - detections_ds, _ = compute_precision_recall_ds( - pred_bboxes_ds=detections_one_model, - gt_bboxes_ds=gt_bboxes_ds, - iou_threshold=iou_threshold_tp, - ) - list_detections_ds_eval.append(detections_ds) - - print(f"Model: {k}") - print(f"Precision: {detections_ds.precision.mean().values:.4f}") - print(f"Recall: {detections_ds.recall.mean().values:.4f}") - print("--------------------------------") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Visualise detections diff --git a/examples/ensemble_of_detectors.yaml b/examples/ensemble_of_detectors.yaml deleted file mode 100644 index 80de260b..00000000 --- a/examples/ensemble_of_detectors.yaml +++ /dev/null @@ -1,19 +0,0 @@ -models: - model_class: fasterrcnn_resnet50_fpn_v2 - model_kwargs: - num_classes: 2 - weights: null - weights_backbone: null - checkpoints: - - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/f348d9d196934073bece1b877cbc4d38/checkpoints/last.ckpt - - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/879d2f77e2b24adcb06b87d2fede6a04/checkpoints/last.ckpt - - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/75583ec227e3444ab692b99c64795325/checkpoints/last.ckpt - - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/4acc37206b1e4f679d535c837bee2c2f/checkpoints/last.ckpt - - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/fdcf88fcbcc84fbeb94b45ca6b6f8914/checkpoints/last.ckpt - - /home/sminano/swc/project_crabs/ml-runs/617393114420881798/daa05ded0ea047388c9134bf044061c5/checkpoints/last.ckpt -fusion: - method: weighted_boxes_fusion - method_kwargs: - iou_thr: 0.5 - skip_box_thr: 0.0001 - max_n_detections: 300 From 397c1a7a492b9b8c948301e3dfba0fe1225b5a82 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 11:36:58 +0000 Subject: [PATCH 25/39] Accelerate fusion using joblib --- ethology/detectors/ensembles/fusion.py | 195 ++++++++++++++----------- 1 file changed, 108 insertions(+), 87 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 1bd08ed1..95bf99cd 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -2,11 +2,14 @@ from collections.abc import Callable from functools import partial -from typing import Literal, TypedDict, Unpack +from typing import Literal, TypeAlias, TypedDict, Unpack import ensemble_boxes import numpy as np +import pandas as pd import xarray as xr +from joblib import Parallel, delayed +from tqdm import tqdm from ethology.validators.detections import ( ValidBboxDetectionsDataset, @@ -14,6 +17,8 @@ ) from ethology.validators.utils import _check_input, _check_output +# ------------------- Supported fusion methods ------------------ +# from ensemble_boxes VALID_FUSION_METHODS = { "weighted_boxes_fusion": ensemble_boxes.weighted_boxes_fusion, "nms": ensemble_boxes.nms, @@ -21,13 +26,25 @@ "non_maxium_weighted": ensemble_boxes.non_maximum_weighted, } -fusion_method_type = Literal[ - "weighted_boxes_fusion", "nms", "soft_nms", "non_maxium_weighted" + +# ------------------ Custom types ---------------------- +TypeFusionMethod = Literal[ + "weighted_boxes_fusion", + "nms", + "soft_nms", + "non_maxium_weighted", +] + +TupleFourDataArrays: TypeAlias = tuple[ + xr.DataArray, + xr.DataArray, + xr.DataArray, + xr.DataArray, ] -class _TypeFusionKwargs(TypedDict, total=False): - """Type hints for fusion method kwargs. +class _TypeFusionMethodKwargs(TypedDict, total=False): + """Type hints for fusion method keyword arguments. Parameters for methods as described in the ensemble_boxes documentation. See https://github.com/ZFTurbo/Weighted-Boxes-Fusion @@ -38,21 +55,20 @@ class _TypeFusionKwargs(TypedDict, total=False): weights: list[float] Weights for each model. iou_thr: float - IoU threshold for detections to be considered a true positive IoU threshold for detections to be considered a true positive during fusion. skip_box_thr: float - Exclude from fusion boxes with confidence below this value. + Exclude boxes with confidence below this value from fusion. sigma: float - Sigma for soft NMS. + Sigma for soft non-maximum supression. thresh: float - Threshold for boxes to keep after soft NMS. + Threshold for boxes to keep after soft non-maximum supression. conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"] Method to compute the confidence score of the fused detections. - "avg": Average confidence score of the fused detections (default). - - 'box_and_model_avg': box and model wise hybrid weighted average. - - 'absent_model_aware_avg': weighted average that takes into account + - "box_and_model_avg": box and model wise hybrid weighted average. + - "absent_model_aware_avg": weighted average that takes into account the absent model. allows_overflow: bool Whether to allow the confidence score of the fused detections to @@ -69,13 +85,17 @@ class _TypeFusionKwargs(TypedDict, total=False): allows_overflow: bool +# ---------------------------------- + + @_check_input(ValidBboxDetectionsEnsembleDataset) @_check_output(ValidBboxDetectionsDataset) def fuse_detections( ensemble_detections_ds: xr.Dataset, - fusion_method: fusion_method_type, + fusion_method: TypeFusionMethod, fusion_method_kwargs: dict | None = None, max_n_detections: int | None = None, + n_workers: int | None = -1, # number of workers for joblib.Parallel ) -> xr.Dataset: """Fuse ensemble detections across models using WBF. @@ -109,46 +129,26 @@ def fuse_detections( _fuse_single_image_detections, fusion_function ) - # Run fusion across image_id using apply_ufunc - centroid_fused_da, shape_fused_da, confidence_fused_da, label_fused_da = ( - xr.apply_ufunc( - _fuse_single_image_detections_partial, - ensemble_detections_ds.position, # .data array is passed - ensemble_detections_ds.shape, - ensemble_detections_ds.confidence, - ensemble_detections_ds.label, - kwargs={ - "image_width_height": image_width_height, - "max_n_detections": max_n_detections, - **(fusion_method_kwargs if fusion_method_kwargs else {}), - }, - input_core_dims=[ # do not broadcast across these - ["space", "id", "model"], # centroid - ["space", "id", "model"], # shape - ["id", "model"], # confidence - ["id", "model"], # label - ], - output_core_dims=[ # do not broadcast across these - ["space", "id"], # centroid - ["space", "id"], # shape - ["id"], # confidence - ["id"], # label - ], - vectorize=True, - # TODO: can I avoid vectorize? - # loop over non-core dims (i.e. image_id); - # assumes function only takes arrays over core dims as input - exclude_dims={"id"}, - # to allow dimensions that change size between input and output + # Run fusion across image_id + # if n_workers is None: + # n_workers = -1 + + results_per_img_id = Parallel(n_jobs=n_workers)( + delayed(_fuse_single_image_detections_partial)( + ensemble_detections_ds.position.sel(image_id=img_id).values, + ensemble_detections_ds.shape.sel(image_id=img_id).values, + ensemble_detections_ds.confidence.sel(image_id=img_id).values, + ensemble_detections_ds.label.sel(image_id=img_id).values, + image_width_height, + max_n_detections, + **fusion_method_kwargs, ) + for img_id in tqdm(ensemble_detections_ds.image_id) ) # Postprocess data arrays fused_data_arrays = _postprocess_multi_image_fused_arrays( - position=centroid_fused_da, - shape=shape_fused_da, - confidence=confidence_fused_da, - label=label_fused_da, + results_per_img_id, ensemble_detections_ds.image_id ) # Return a dataset @@ -209,15 +209,16 @@ def _preprocess_single_image_detections( label: xr.DataArray, image_width_height: np.ndarray, ) -> tuple[list[np.ndarray], list[np.ndarray], list[np.ndarray]]: - """Prepare ensemble detections on a single image for fusion.""" - # Prepare boxes array --> position, shape arrays to x1y1x2y normalised + """Prepare detections of an ensemble on a single image for fusion.""" + # Prepare boxes array + # transform position and shape arrays to x1y1x2y normalised bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None] bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None] bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2]) - # 4, n_annot, n_models + # shape: 4, max_n_annotations_per_frame, n_models # Get list of bboxes per model - # arrays need to be tall for WBF + # arrays need to be tall for fusion methods n_models = bboxes_x1y1_x2y2_normalised.shape[-1] list_bboxes_per_model = [ arr.squeeze() @@ -308,7 +309,7 @@ def _postprocess_single_image_detections( # Format output as xarray dataarrays centroid_da, shape_da, confidence_da, label_da = ( - _single_image_detections_as_dataarrays( + _parse_single_image_detections_as_dataarrays( ensemble_data[:, 0:4], ensemble_data[:, 4], ensemble_data[:, 5], @@ -320,15 +321,15 @@ def _postprocess_single_image_detections( def _fuse_single_image_detections( fusion_function: Callable, - position, - shape, + position: np.ndarray, + shape: np.ndarray, confidence: np.ndarray, label: np.ndarray, image_width_height: np.ndarray, max_n_detections: int, - **fusion_kwargs: Unpack[_TypeFusionKwargs], # method-only kwargs -) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Fuse detections across models for a single image using WBF.""" + **fusion_kwargs: Unpack[_TypeFusionMethodKwargs], # method-only kwargs +) -> TupleFourDataArrays: + """Fuse detections across models for a single image using selected method.""" # Prepare single image arrays for fusion list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( _preprocess_single_image_detections( @@ -336,8 +337,7 @@ def _fuse_single_image_detections( ) ) - # ------------------------------------ - # Run WBF on one image + # Run fusion method on one image ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( fusion_function( list_bboxes_per_model, @@ -347,8 +347,6 @@ def _fuse_single_image_detections( ) ) - # ------------------------------------ - # Format output as xarray dataarrays centroid_da, shape_da, confidence_da, label_da = ( _postprocess_single_image_detections( @@ -363,13 +361,13 @@ def _fuse_single_image_detections( return centroid_da, shape_da, confidence_da, label_da -def _single_image_detections_as_dataarrays( +def _parse_single_image_detections_as_dataarrays( x1y1_x2y2_array: np.ndarray, scores_array: np.ndarray, labels_array: np.ndarray, id_array: np.ndarray | None = None, -) -> tuple[xr.DataArray, xr.DataArray, xr.DataArray, xr.DataArray]: - """Format single image fused detections as data arrays.""" +) -> TupleFourDataArrays: + """Format array of single image fused results as data arrays.""" if id_array is None: n_detections = x1y1_x2y2_array.shape[0] id_array = np.arange(n_detections) @@ -396,30 +394,53 @@ def _single_image_detections_as_dataarrays( ) +@_check_output(ValidBboxDetectionsDataset) def _postprocess_multi_image_fused_arrays( - position: xr.DataArray, - shape: xr.DataArray, - confidence: xr.DataArray, - label: xr.DataArray, -) -> dict: - """Postprocess fused data arrays on multiple images after fusion.""" - data_arrays = [position, shape, confidence, label] + results_per_img_id: list[TupleFourDataArrays], + list_img_id: list, +) -> xr.Dataset: + """Postprocess fused data arrays on multiple images after fusion. - # Remove extra padding across annotations - position_da, shape_da, confidence_da, label_da = [ - da.dropna(dim="id", how="all") for da in data_arrays - ] + Fix padding and assign id coordinates. + """ + # Parse results from joblib + # (output from joblib is a list of n = n_images, each element + # containing a tuple of data arrays) + list_da_dict = {} + ( + list_da_dict["position"], + list_da_dict["shape"], + list_da_dict["confidence"], + list_da_dict["label"], + ) = list(zip(*results_per_img_id)) + + # Concatenate lists of dataarrays along image_id dimension + fused_da_dict = {} + for da_str, list_da in list_da_dict.items(): + fused_da_dict[da_str] = xr.concat( + list_da, + pd.Index(list_img_id, name="image_id"), + ) + + # Remove extra padding in id dimension + fixed_padding_da_dict = {} + for da_str, da in fused_da_dict.items(): + fixed_padding_da_dict[da_str] = da.dropna(dim="id", how="all") # Pad labels with -1 rather than nan - label_da = label_da.fillna(-1).astype(int) - - # Assign id coordinates to data arrays - # (these are lost after apply_ufunc because exclude_dims is used) - n_max_detections = position_da.sizes["id"] - id_coords = np.arange(n_max_detections) - return { - "position": position_da.assign_coords(id=id_coords), - "shape": shape_da.assign_coords(id=id_coords), - "confidence": confidence_da.assign_coords(id=id_coords), - "label": label_da.assign_coords(id=id_coords), - } + fixed_padding_da_dict["label"] = ( + fixed_padding_da_dict["label"].fillna(-1).astype(int) + ) + + # Format as dataset + return xr.Dataset(data_vars=fixed_padding_da_dict) + # # Assign id coordinates to data arrays + # # (these are lost after apply_ufunc because exclude_dims is used) + # n_max_detections = fixed_padding_da_dict["position"].sizes["id"] + # id_coords = np.arange(n_max_detections) + + # fixed_id_coord_da_dict = {} + # for da_str, da in fixed_padding_da_dict.items(): + # fixed_id_coord_da_dict[da_str] = da.assign_coords(id=id_coords) + + # return fixed_padding_da_dict From a515f1f7c6b8f2ea66271a870eb2441d9e6a9ff5 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 13:05:41 +0000 Subject: [PATCH 26/39] Simplify fusion module --- ethology/detectors/ensembles/fusion.py | 339 ++++++++++--------------- 1 file changed, 138 insertions(+), 201 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 95bf99cd..948ae776 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -48,32 +48,6 @@ class _TypeFusionMethodKwargs(TypedDict, total=False): Parameters for methods as described in the ensemble_boxes documentation. See https://github.com/ZFTurbo/Weighted-Boxes-Fusion - - - Parameters - ---------- - weights: list[float] - Weights for each model. - iou_thr: float - IoU threshold for detections to be considered a true positive - during fusion. - skip_box_thr: float - Exclude boxes with confidence below this value from fusion. - sigma: float - Sigma for soft non-maximum supression. - thresh: float - Threshold for boxes to keep after soft non-maximum supression. - conf_type: Literal["avg", "box_and_model_avg", "absent_model_aware_avg"] - Method to compute the confidence score of the fused detections. - - - "avg": Average confidence score of the fused detections (default). - - "box_and_model_avg": box and model wise hybrid weighted average. - - "absent_model_aware_avg": weighted average that takes into account - the absent model. - allows_overflow: bool - Whether to allow the confidence score of the fused detections to - exceed 1. - """ weights: list[float] | None @@ -95,12 +69,12 @@ def fuse_detections( fusion_method: TypeFusionMethod, fusion_method_kwargs: dict | None = None, max_n_detections: int | None = None, - n_workers: int | None = -1, # number of workers for joblib.Parallel + n_workers: int | None = -1, ) -> xr.Dataset: - """Fuse ensemble detections across models using WBF. + """Fuse ensemble detections across models using the selected method. You can set a max_n_detections if upper bound is known a prior to - reduce memory usage. + reduce memory usage. n_workers: number of workers for joblib.Parallel """ # Check if image_width_height defined in dataset @@ -111,14 +85,13 @@ def fuse_detections( "attributes. Please ensure the dataset has 'image_shape' " "(width, height in pixels) in its attributes." ) - else: - image_width_height = _validate_image_shape(image_shape) + image_width_height = _validate_image_shape(image_shape) # Compute upper bound of max_n_detections if not max_n_detections: max_n_detections = _estimate_max_n_detections(ensemble_detections_ds) - # Build single-image partial fusion function for the selected method + # Build single-image partial function for the selected fusion method if fusion_method not in VALID_FUSION_METHODS: raise ValueError( f"Invalid fusion method: {fusion_method}. " @@ -129,10 +102,7 @@ def fuse_detections( _fuse_single_image_detections, fusion_function ) - # Run fusion across image_id - # if n_workers is None: - # n_workers = -1 - + # Parallelise fusion across image_id results_per_img_id = Parallel(n_jobs=n_workers)( delayed(_fuse_single_image_detections_partial)( ensemble_detections_ds.position.sel(image_id=img_id).values, @@ -147,28 +117,45 @@ def fuse_detections( ) # Postprocess data arrays - fused_data_arrays = _postprocess_multi_image_fused_arrays( + fused_detections_ds = _postprocess_multi_image_fused_arrays( results_per_img_id, ensemble_detections_ds.image_id ) - # Return a dataset - return xr.Dataset(data_vars=fused_data_arrays) + return fused_detections_ds -def _validate_image_shape(image_shape) -> np.ndarray: - """Validate and convert image shape to numpy array. +# ------- Multi image fusion ------------------ - Args: - image_shape: Image dimensions as (width, height). - Should be array-like with 2 elements. +@_check_output(ValidBboxDetectionsDataset) +def _postprocess_multi_image_fused_arrays( + results_per_img_id: list[TupleFourDataArrays], + list_img_id: list, +) -> xr.Dataset: + """Postprocess fused data arrays on multiple images after fusion. - Returns: - np.ndarray: Validated image shape as 1D array with 2 elements. + Fix padding and assign id coordinates. + """ + # Transpose results from list-of-tuples to tuple-of-lists + da_names = ("position", "shape", "confidence", "label") + da_lists = zip(*results_per_img_id) - Raises: - ValueError: If image_shape cannot be converted to a valid shape. + # Concatenate lists of dataarrays along image_id dimension and + # remove extra padding in "id" dimension + fused_da_dict = {} + for da_str, list_da in zip(da_names, da_lists, strict=True): + fused_da_dict[da_str] = xr.concat( + list_da, pd.Index(list_img_id, name="image_id") + ).dropna(dim="id", how="all") - """ + # Pad labels with -1 rather than nan + fused_da_dict["label"] = fused_da_dict["label"].fillna(-1).astype(int) + + return xr.Dataset(data_vars=fused_da_dict) + + +def _validate_image_shape(image_shape) -> np.ndarray: + """Validate and cast image shape as numpy array.""" + # Try casting as numpy array try: image_shape = np.asarray(image_shape) except (TypeError, ValueError) as e: @@ -177,20 +164,22 @@ def _validate_image_shape(image_shape) -> np.ndarray: "Expected format: (width, height) as tuple or array-like." ) from e - # Flatten to handle (2,), (1,2) and (2,1) shapes - image_shape = image_shape.flatten() - if image_shape.shape != (2,): + # Check number of elements in array + if image_shape.size != 2: raise ValueError( f"'image_shape' must have exactly 2 elements (width, height), " f"got shape {image_shape.shape}" ) - return image_shape @_check_input(ValidBboxDetectionsEnsembleDataset) def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int: - """Get upper bound for maximum number of boxes per image after fusion.""" + """Get upper bound for maximum number of boxes per image after fusion. + + We assume no detections are fused and all images have as many detections as the maximum + number of non-nan detections per image. + """ detections_w_non_nan_position = ( ensemble_detections_ds.position.notnull().all(dim="space") ) # True if non-nan x and y @@ -202,6 +191,51 @@ def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int: ) +# ------- Single image fusion ------------------ + + +def _fuse_single_image_detections( + fusion_function: Callable, + position: np.ndarray, + shape: np.ndarray, + confidence: np.ndarray, + label: np.ndarray, + image_width_height: np.ndarray, + max_n_detections: int, + **fusion_kwargs: Unpack[_TypeFusionMethodKwargs], # method-only kwargs +) -> TupleFourDataArrays: + """Fuse detections across models for a single image using selected method.""" + # Prepare single image arrays for fusion + list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( + _preprocess_single_image_detections( + position, shape, confidence, label, image_width_height + ) + ) + + # Run fusion method on one image + ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( + fusion_function( + list_bboxes_per_model, + list_confidence_per_model, + list_label_per_model, + **fusion_kwargs, + ) + ) + + # Format output as xarray dataarrays + centroid_da, shape_da, confidence_da, label_da = ( + _postprocess_single_image_detections( + ensemble_x1y1_x2y2_norm, + ensemble_scores, + ensemble_labels, + image_width_height, + max_n_detections, + ) + ) + + return centroid_da, shape_da, confidence_da, label_da + + def _preprocess_single_image_detections( position: xr.DataArray, shape: xr.DataArray, @@ -214,9 +248,14 @@ def _preprocess_single_image_detections( # transform position and shape arrays to x1y1x2y normalised bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None] bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None] - bboxes_x1y1_x2y2_normalised = np.concat([bboxes_x1y1, bboxes_x2y2]) - # shape: 4, max_n_annotations_per_frame, n_models + bboxes_x1y1_x2y2_normalised = np.transpose( + np.concat( + [bboxes_x1y1, bboxes_x2y2] + ), # shape: 4, max_n_annotations_per_frame, n_models + (1, 0, 2), # shape: max_n_annotations_per_frame, 4, n_models + ) + # -------------------- # Get list of bboxes per model # arrays need to be tall for fusion methods n_models = bboxes_x1y1_x2y2_normalised.shape[-1] @@ -230,36 +269,31 @@ def _preprocess_single_image_detections( list_label_per_model = [ arr.squeeze() for arr in np.split(label, n_models, axis=-1) ] + # -------------------- - # Remove rows with nan coordinates - list_bboxes_per_model = [ - arr[:, ~np.any(np.isnan(arr), axis=0)].T - for arr in list_bboxes_per_model - ] - list_confidence_per_model = [ - conf_arr[: bbox_arr.shape[0]] - for bbox_arr, conf_arr in zip( + # Remove rows with nan coordinates and return lists of arrays + list_non_nan_bboxes_per_model = [ + sum(~np.any(np.isnan(arr), axis=1)) for arr in list_bboxes_per_model + ] + return ( + _chop_end_of_array(list_arrays_per_model, list_non_nan_bboxes_per_model) + for list_arrays_per_model in [ list_bboxes_per_model, list_confidence_per_model, - strict=True, - ) - ] - list_label_per_model = [ - label_arr[: bbox_arr.shape[0]] - for bbox_arr, label_arr in zip( - list_bboxes_per_model, list_label_per_model, - strict=True, - ) - ] - - return ( - list_bboxes_per_model, - list_confidence_per_model, - list_label_per_model, + ] ) +def _chop_end_of_array( + list_arrays: list[np.ndarray], list_end_lengths: list[int] +) -> list[np.ndarray]: + """Chop end of arrays in list to the desired length along the first dimension.""" + return [ + arr[:n] for arr, n in zip(list_arrays, list_end_lengths, strict=True) + ] + + def _postprocess_single_image_detections( ensemble_x1y1_x2y2_norm, ensemble_scores, @@ -276,89 +310,44 @@ def _postprocess_single_image_detections( image_width_height, (1, 2) ) - # Combine x1y1, x2y2, scores and labels in one array - ensemble_data = np.c_[ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels] - - # Remove rows with nan coordinates - ensemble_data = ensemble_data[ - ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1) - ] - - # Check padding - if ensemble_data.shape[0] > max_n_detections: + # Get 1d array for non-nan boxes + bool_non_nan_array = ~np.any(np.isnan(ensemble_x1y1_x2y2), axis=1) + n_non_nan_boxes = bool_non_nan_array.sum() + if n_non_nan_boxes > max_n_detections: raise ValueError( "Insufficient padding provided. " "The estimated maximum number of detections per image was set to " f"{max_n_detections}, " - f"but {ensemble_data.shape[0]} detections were " + f"but {n_non_nan_boxes} detections were " "found in one of the images after fusion. Please increase the " "maximum number of detections per image." ) - # Pad combined array to max_n_detections - # (this is required to concatenate across image_ids) - ensemble_data = np.pad( - ensemble_data, - ( - (0, max_n_detections - ensemble_data.shape[0]), - (0, 0), + # Retain non-nan boxes only and pad each array + return _parse_single_image_detections_as_dataarrays( + *( + _remove_nan_and_pad_to_max( + arr, bool_non_nan_array, max_n_detections + ) + for arr in (ensemble_x1y1_x2y2, ensemble_scores, ensemble_labels) ), - "constant", - constant_values=np.nan, - ) - - # Format output as xarray dataarrays - centroid_da, shape_da, confidence_da, label_da = ( - _parse_single_image_detections_as_dataarrays( - ensemble_data[:, 0:4], - ensemble_data[:, 4], - ensemble_data[:, 5], - ) - ) - - return centroid_da, shape_da, confidence_da, label_da - - -def _fuse_single_image_detections( - fusion_function: Callable, - position: np.ndarray, - shape: np.ndarray, - confidence: np.ndarray, - label: np.ndarray, - image_width_height: np.ndarray, - max_n_detections: int, - **fusion_kwargs: Unpack[_TypeFusionMethodKwargs], # method-only kwargs -) -> TupleFourDataArrays: - """Fuse detections across models for a single image using selected method.""" - # Prepare single image arrays for fusion - list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( - _preprocess_single_image_detections( - position, shape, confidence, label, image_width_height - ) ) - # Run fusion method on one image - ensemble_x1y1_x2y2_norm, ensemble_scores, ensemble_labels = ( - fusion_function( - list_bboxes_per_model, - list_confidence_per_model, - list_label_per_model, - **fusion_kwargs, - ) - ) - # Format output as xarray dataarrays - centroid_da, shape_da, confidence_da, label_da = ( - _postprocess_single_image_detections( - ensemble_x1y1_x2y2_norm, - ensemble_scores, - ensemble_labels, - image_width_height, - max_n_detections, - ) +def _remove_nan_and_pad_to_max( + input_array, mask_non_nan_rows, max_n_detections, fill_value=np.nan +): + """Remove non-nan from input array and pad with nans, all along first dimension.""" + # Initialise array with nans + padded_array = np.full( + (max_n_detections, *input_array.shape[1:]), + fill_value, + dtype=input_array.dtype, ) - - return centroid_da, shape_da, confidence_da, label_da + # Replace top "mask_non_nan_rows.sum()" chunk with non-nan values from + # input array + padded_array[: mask_non_nan_rows.sum()] = input_array[mask_non_nan_rows] + return padded_array def _parse_single_image_detections_as_dataarrays( @@ -392,55 +381,3 @@ def _parse_single_image_detections_as_dataarrays( xr.DataArray(scores_array, dims=["id"], coords=id_coords), xr.DataArray(labels_array, dims=["id"], coords=id_coords), ) - - -@_check_output(ValidBboxDetectionsDataset) -def _postprocess_multi_image_fused_arrays( - results_per_img_id: list[TupleFourDataArrays], - list_img_id: list, -) -> xr.Dataset: - """Postprocess fused data arrays on multiple images after fusion. - - Fix padding and assign id coordinates. - """ - # Parse results from joblib - # (output from joblib is a list of n = n_images, each element - # containing a tuple of data arrays) - list_da_dict = {} - ( - list_da_dict["position"], - list_da_dict["shape"], - list_da_dict["confidence"], - list_da_dict["label"], - ) = list(zip(*results_per_img_id)) - - # Concatenate lists of dataarrays along image_id dimension - fused_da_dict = {} - for da_str, list_da in list_da_dict.items(): - fused_da_dict[da_str] = xr.concat( - list_da, - pd.Index(list_img_id, name="image_id"), - ) - - # Remove extra padding in id dimension - fixed_padding_da_dict = {} - for da_str, da in fused_da_dict.items(): - fixed_padding_da_dict[da_str] = da.dropna(dim="id", how="all") - - # Pad labels with -1 rather than nan - fixed_padding_da_dict["label"] = ( - fixed_padding_da_dict["label"].fillna(-1).astype(int) - ) - - # Format as dataset - return xr.Dataset(data_vars=fixed_padding_da_dict) - # # Assign id coordinates to data arrays - # # (these are lost after apply_ufunc because exclude_dims is used) - # n_max_detections = fixed_padding_da_dict["position"].sizes["id"] - # id_coords = np.arange(n_max_detections) - - # fixed_id_coord_da_dict = {} - # for da_str, da in fixed_padding_da_dict.items(): - # fixed_id_coord_da_dict[da_str] = da.assign_coords(id=id_coords) - - # return fixed_padding_da_dict From fb779ddaac4ad6a65909c5b85864be618ba3a7c1 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 13:16:59 +0000 Subject: [PATCH 27/39] Add centroid, shape to corner utils --- ethology/detectors/ensembles/fusion.py | 39 ++++++++++++++++---------- ethology/detectors/ensembles/models.py | 12 ++++++-- ethology/detectors/ensembles/utils.py | 17 +++++++++++ 3 files changed, 50 insertions(+), 18 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 948ae776..7d2239fa 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -16,6 +16,10 @@ ValidBboxDetectionsEnsembleDataset, ) from ethology.validators.utils import _check_input, _check_output +from ethology.detectors.ensembles.utils import ( + centroid_shape_to_corners, + corners_to_centroid_shape, +) # ------------------- Supported fusion methods ------------------ # from ensemble_boxes @@ -69,7 +73,7 @@ def fuse_detections( fusion_method: TypeFusionMethod, fusion_method_kwargs: dict | None = None, max_n_detections: int | None = None, - n_workers: int | None = -1, + n_workers: int | None = -1, ) -> xr.Dataset: """Fuse ensemble detections across models using the selected method. @@ -126,6 +130,7 @@ def fuse_detections( # ------- Multi image fusion ------------------ + @_check_output(ValidBboxDetectionsDataset) def _postprocess_multi_image_fused_arrays( results_per_img_id: list[TupleFourDataArrays], @@ -176,7 +181,7 @@ def _validate_image_shape(image_shape) -> np.ndarray: @_check_input(ValidBboxDetectionsEnsembleDataset) def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int: """Get upper bound for maximum number of boxes per image after fusion. - + We assume no detections are fused and all images have as many detections as the maximum number of non-nan detections per image. """ @@ -246,8 +251,9 @@ def _preprocess_single_image_detections( """Prepare detections of an ensemble on a single image for fusion.""" # Prepare boxes array # transform position and shape arrays to x1y1x2y normalised - bboxes_x1y1 = (position - shape / 2) / image_width_height[:, None, None] - bboxes_x2y2 = (position + shape / 2) / image_width_height[:, None, None] + x1y1, x2y2 = centroid_shape_to_corners(position, shape) + bboxes_x1y1 = x1y1 / image_width_height[:, None, None] + bboxes_x2y2 = x2y2 / image_width_height[:, None, None] bboxes_x1y1_x2y2_normalised = np.transpose( np.concat( [bboxes_x1y1, bboxes_x2y2] @@ -259,7 +265,7 @@ def _preprocess_single_image_detections( # Get list of bboxes per model # arrays need to be tall for fusion methods n_models = bboxes_x1y1_x2y2_normalised.shape[-1] - list_bboxes_per_model = [ + list_x1y1_x2y2_norm_per_model = [ arr.squeeze() for arr in np.split(bboxes_x1y1_x2y2_normalised, n_models, axis=-1) ] @@ -273,12 +279,15 @@ def _preprocess_single_image_detections( # Remove rows with nan coordinates and return lists of arrays list_non_nan_bboxes_per_model = [ - sum(~np.any(np.isnan(arr), axis=1)) for arr in list_bboxes_per_model - ] + sum(~np.any(np.isnan(arr), axis=1)) + for arr in list_x1y1_x2y2_norm_per_model + ] return ( - _chop_end_of_array(list_arrays_per_model, list_non_nan_bboxes_per_model) + _chop_end_of_array( + list_arrays_per_model, list_non_nan_bboxes_per_model + ) for list_arrays_per_model in [ - list_bboxes_per_model, + list_x1y1_x2y2_norm_per_model, list_confidence_per_model, list_label_per_model, ] @@ -361,8 +370,10 @@ def _parse_single_image_detections_as_dataarrays( n_detections = x1y1_x2y2_array.shape[0] id_array = np.arange(n_detections) - # Extract bbox corner coordinates - x1y1, x2y2 = x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4] + # Extract bbox centre and shape + centroid, shape = corners_to_centroid_shape( + x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4] + ) # Shared coordinates id_coords = {"id": id_array} @@ -371,13 +382,11 @@ def _parse_single_image_detections_as_dataarrays( # Build all DataArrays return ( xr.DataArray( - (0.5 * (x1y1 + x2y2)).T, + centroid.T, dims=["space", "id"], coords=spatial_id_coords, ), - xr.DataArray( - (x2y2 - x1y1).T, dims=["space", "id"], coords=spatial_id_coords - ), + xr.DataArray(shape.T, dims=["space", "id"], coords=spatial_id_coords), xr.DataArray(scores_array, dims=["id"], coords=id_coords), xr.DataArray(labels_array, dims=["id"], coords=id_coords), ) diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 03d20211..11fb5494 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -11,7 +11,10 @@ from lightning import LightningModule from torchvision.models import detection, get_model, list_models -from ethology.detectors.ensembles.utils import pad_to_max_first_dimension +from ethology.detectors.ensembles.utils import ( + corners_to_centroid_shape, + pad_to_max_first_dimension, +) from ethology.validators.detections import ValidBboxDetectionsEnsembleDataset from ethology.validators.utils import _check_output @@ -177,8 +180,11 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: # arrays of shape (image_id, 4/1, n_max_detections, n_models) # Compute centroid and shape arrays - centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4]) - shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2] + # centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4]) + # shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2] + centroid_array, shape_array = corners_to_centroid_shape( + bboxes_array[:, 0:2], bboxes_array[:, 2:4] + ) # Return as ethology detections dataset max_n_detections = bboxes_array.shape[-2] diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py index 0ab1e2f8..03ff3b2d 100644 --- a/ethology/detectors/ensembles/utils.py +++ b/ethology/detectors/ensembles/utils.py @@ -23,3 +23,20 @@ def pad_to_max_first_dimension(list_arrays, fill_value=np.nan): for arr in list_arrays ] return list_arrays_padded + + +def centroid_shape_to_corners(position, shape): + """Convert centroid and shape arrays to x1y1, x2y2 corner arrays.""" + half_shape = shape / 2 + return ( + position - half_shape, # x1y1 + position + half_shape, # x2y2 + ) + + +def corners_to_centroid_shape(x1y1, x2y2): + """Convert x1y1, x2y2 corner arrays to centroid and shape arrays.""" + return ( + 0.5 * (x1y1 + x2y2), # centroid + x2y2 - x1y1, # shape + ) From 8d9a6dfe15ce96053aea4b768dbc3d9d76654cf1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Thu, 4 Dec 2025 13:17:21 +0000 Subject: [PATCH 28/39] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- ethology/detectors/ensembles/fusion.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 7d2239fa..2b0801ac 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -11,15 +11,15 @@ from joblib import Parallel, delayed from tqdm import tqdm +from ethology.detectors.ensembles.utils import ( + centroid_shape_to_corners, + corners_to_centroid_shape, +) from ethology.validators.detections import ( ValidBboxDetectionsDataset, ValidBboxDetectionsEnsembleDataset, ) from ethology.validators.utils import _check_input, _check_output -from ethology.detectors.ensembles.utils import ( - centroid_shape_to_corners, - corners_to_centroid_shape, -) # ------------------- Supported fusion methods ------------------ # from ensemble_boxes From c3ab6891e6cf5747e4c770f6585f0ba4083c65d6 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:11:03 +0000 Subject: [PATCH 29/39] Make format_predictions a staticmethod --- ethology/detectors/ensembles/models.py | 29 ++- examples/ensemble_of_detectors.py | 333 +++++++++++++++++++++++++ 2 files changed, 356 insertions(+), 6 deletions(-) create mode 100644 examples/ensemble_of_detectors.py diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 11fb5494..1830fc71 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -9,6 +9,7 @@ import xarray as xr import yaml from lightning import LightningModule +from torch.nn.parallel import parallel_apply from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import ( @@ -112,10 +113,19 @@ def predict_step(self, batch, batch_idx): """Predict step for a single batch.""" # Run all models in ensemble in GPU images_batch, _annotations_batch = batch + # # ----------------------------------- raw_prediction_dicts_per_model = [ model(images_batch) for model in self.list_models ] # [num_models][batch_size] + # Run all models in parallel on this GPU + # inputs = [(images_batch,)] * len(self.list_models[:3]) + # raw_prediction_dicts_per_model = parallel_apply( + # modules=self.list_models, #----- + # inputs=[(images_batch,)] * len(self.list_models), + # ) + # # ----------------------------------- + # Transpose to [batch_size][num_models] for easier downstream # processing raw_prediction_dicts_per_sample = [ @@ -127,16 +137,23 @@ def predict_step(self, batch, batch_idx): return raw_prediction_dicts_per_sample + @staticmethod @_check_output(ValidBboxDetectionsEnsembleDataset) - def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: - """Format as ethology detections dataset with model axis.""" + def format_predictions( + predictions: list[dict], attrs: dict | None = None + ) -> xr.Dataset: + """Format as ethology detections dataset with model axis. + + predictions: raw_predictions_per_model + """ # Get results from trainer - raw_predictions_per_model = self.trainer.predict_loop.predictions + # raw_predictions_per_model = self.trainer.predict_loop.predictions # Flatten batches raw_prediction_dicts_per_sample = list( - chain.from_iterable(raw_predictions_per_model) + chain.from_iterable(predictions) ) # [sample][model] + n_models = len(raw_prediction_dicts_per_sample[0]) # Parse output from dicts output_per_sample: dict[str, list] = { @@ -146,7 +163,7 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: } for ky in output_per_sample: output_per_sample[ky] = [ - [sample[m][ky] for m in range(len(self.list_models))] + [sample[m][ky] for m in range(n_models)] for sample in raw_prediction_dicts_per_sample ] # [sample][model] @@ -204,7 +221,7 @@ def format_predictions(self, attrs: dict | None = None) -> xr.Dataset: "image_id": np.arange(n_images), "space": ["x", "y"], "id": np.arange(max_n_detections), - "model": np.arange(len(self.list_models)), + "model": np.arange(n_models), }, attrs=attrs if attrs else {}, ) diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py new file mode 100644 index 00000000..86911633 --- /dev/null +++ b/examples/ensemble_of_detectors.py @@ -0,0 +1,333 @@ +"""Evaluating ensemble of trained detectors.""" +# %% +# imports + +from pathlib import Path + +import numpy as np +import torch +import torchvision.transforms.v2 as transforms +import xarray as xr +import yaml +from lightning import Trainer +from matplotlib import pyplot as plt +from torch.utils.data import DataLoader +from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 + +from ethology.detectors.ensembles.fusion import fuse_detections +from ethology.detectors.ensembles.models import EnsembleDetector +from ethology.detectors.evaluate import compute_precision_recall_ds +from ethology.io.annotations import load_bboxes + +# %% +# %matplotlib widget +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +# Helper functions +def create_coco_dataset( + images_dir: str | Path, + annotations_file: str | Path, + composed_transform: transforms.Compose, +) -> CocoDetection: + """Create a COCO dataset for object detection. + + Note: transforms are applied to the full dataset. If the dataset + is later split, all splits will have the same transforms. + """ + dataset_coco = CocoDetection( + root=images_dir, + annFile=annotations_file, + transforms=composed_transform, + ) + + # wrap dataset for transforms v2 + dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco) + + return dataset_transformed + + +def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: + """Collate function for dataloader with varying number of bounding boxes. + + A custom function is needed for detection + because the number of bounding boxes varies + between images of the same batch. + See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop + + Parameters + ---------- + batch : tuple + a tuple of 2 tuples, the first one holding all images in the batch, + and the second one holding the corresponding annotations. + + Returns + ------- + tuple + a tuple of length = batch size, made up of (image, annotations) + tuples. + + """ + return tuple(zip(*batch, strict=True)) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Input data + +dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full") +images_dir = dataset_dir / "frames" +annotations_dir = dataset_dir / "annotations" +annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json" + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Define a dataloader +# Define transforms for inference +inference_transforms = transforms.Compose( + [ + transforms.ToImage(), + transforms.ToDtype(torch.float32, scale=True), + ] +) + +# Create COCO dataset +# TODO: convert from ethology detections dataset to COCO dataset +# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") +dataset_coco = create_coco_dataset( + images_dir=Path(dataset_dir) / "frames", + annotations_file=annotations_file_path, + composed_transform=inference_transforms, +) + +# dataloader +dataloader = DataLoader( + dataset_coco, + batch_size=12, # 12, + shuffle=False, + num_workers=8, # 4 + collate_fn=collate_fn_varying_n_bboxes, + persistent_workers=True, + # pin_memory=True, # <-- Faster CPU->GPU transfer + # because we guarantee a physical address for the data + # in memory, so we can use DMA that directly takes it to + # the GPU + # prefetch_factor=4, # <-- Prefetch more batches + # multiprocessing_context="fork" + # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available() + # else None, # see https://github.com/pytorch/pytorch/issues/87688 +) + +# %% +# TODO: dataloader to ethology detections dataset +gt_bboxes_ds = load_bboxes.from_files( + annotations_file_path, format="COCO", images_dirs=images_dir +) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Define a YAML config file for the ensemble of trained detectors +experiment_ID = "617393114420881798" +ml_runs_experiment_dir = ( + Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID +) +last_ckpt = Path("checkpoints") / "last.ckpt" + +config = { + "models": { + "model_class": "fasterrcnn_resnet50_fpn_v2", + # imported from torchvision.models.detection + "model_kwargs": { + "num_classes": 2, + "weights": None, # null in YAML becomes None in Python + "weights_backbone": None, + }, + "checkpoints": [ + str( + ml_runs_experiment_dir + / "f348d9d196934073bece1b877cbc4d38" + / last_ckpt + ), # above_0th + str( + ml_runs_experiment_dir + / "879d2f77e2b24adcb06b87d2fede6a04" + / last_ckpt + ), # above_1st + str( + ml_runs_experiment_dir + / "75583ec227e3444ab692b99c64795325" + / last_ckpt + ), # above_5th + str( + ml_runs_experiment_dir + / "4acc37206b1e4f679d535c837bee2c2f" + / last_ckpt + ), # above_10th + str( + ml_runs_experiment_dir + / "fdcf88fcbcc84fbeb94b45ca6b6f8914" + / last_ckpt + ), # above_25th + str( + ml_runs_experiment_dir + / "daa05ded0ea047388c9134bf044061c5" + / last_ckpt + ), # above_50th + ], + }, + "fusion": { + "method": "weighted_boxes_fusion", + # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted" + "method_kwargs": { + # arguments as in ensemble_boxes.weighted_boxes_fusion + "iou_thr": 0.5, # iou threshold for the ensemble + "skip_box_thr": 0.0001, + }, + "n_jobs": -1, # workers for joblib.Parallel, + # n_workers should be <= number of CPU cores + # follows joblib n_jobs + # if -1: all are used + # if None: same as 1 + # "confidence_threshold_post_fusion": 0.0, + "max_n_detections": 300, + }, +} +config_file = "ensemble_of_detectors.yaml" +with open(config_file, "w") as f: + yaml.dump(config, f, sort_keys=False) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Load the ensemble of detectors +ensemble_detector = EnsembleDetector(config_file) +print(f"Ensemble detector is on device: {ensemble_detector.device}") + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Run the ensemble of detectors on a dataset +# Use Trainer for inference (this sets the device flexibly) + +# With multiple devices: +# Lightning handles the "main" device (so still device=1), +# while code internally distributes models across GPUs using parallel_apply. +trainer = Trainer( + accelerator="gpu", + devices=1, + logger=False, + precision="16-mixed", # --- results change + # strategy = 'ddp' ? +) +predictions = trainer.predict(ensemble_detector, dataloader) + + +# %% +# Format predictions as ethology detections dataset and add attrs +# TODO: think about syntax of format_predictions (should it be instance or +# static method instead?) +ensemble_detections_ds = ensemble_detector.format_predictions( + predictions=predictions, + attrs=gt_bboxes_ds.attrs +) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Fuse detections across models using selected method +image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] +ensemble_detections_ds.attrs["image_shape"] = image_width_height +config_fusion: dict = config["fusion"] + + +fused_detections_ds = fuse_detections( + ensemble_detections_ds, + fusion_method=config_fusion["method"], + fusion_method_kwargs=config_fusion["method_kwargs"], + # n_workers=config_fusion.get("n_jobs", 1), + # max_n_detections=config_fusion["max_n_detections"], + # should be larger than expected maximum number of detections after fusion + # ---- method kwargs ---- +) + +# %% + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Fuse detections across models with NMS + +# fused_detections_nms_ds = fuse_ensemble_detections( +# ensemble_detections_ds, +# fusion_method="soft_nms", +# fusion_method_kwargs={ +# "iou_thr": config_fusion["method_kwargs"]["iou_thr"], +# "sigma": 0.5, +# "thresh": 0.001, +# }, +# max_n_detections=500, +# ) + +# fused_detections_ds = fused_detections_nms_ds +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Remove low confidence detections +confidence_threshold_post_fusion = 0.4 +fused_detections_ds_ = fused_detections_ds.where( + fused_detections_ds.confidence >= confidence_threshold_post_fusion +) + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Evaluate the ensemble model +# - load ground truth +# - compute metrics + +# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") + +iou_threshold_tp = 0.25 +fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds( + pred_bboxes_ds=fused_detections_ds_, + gt_bboxes_ds=gt_bboxes_ds, + iou_threshold=iou_threshold_tp, +) + +# All models on full August dataset, without removing low +# confidence detections: +# confidence_threshold_post_fusion = 0.0 +# Precision: 0.5920 +# Recall: 0.8455 +# --- +# confidence_threshold_post_fusion = 0.4 +# Precision: 0.8339 +# Recall: 0.7177 +# --- +# confidence_threshold_post_fusion = 0.5 +# Precision: 0.8714 +# Recall: 0.6624 +# --- +# confidence threshold post fusion: 0.40 AND mixed precision in trainer +# Precision: 0.8336 +# Recall: 0.7162 + +print( + "Ensemble model with confidence threshold post fusion: " + f"{confidence_threshold_post_fusion:.2f}" +) +print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}") +print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}") + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +# Evaluate single models +list_detections_ds_eval = [] +for k in range(ensemble_detections_ds.sizes["model"]): + # filter low confidence detections (for a fairer comparison) + detections_one_model = ensemble_detections_ds.where( + ensemble_detections_ds.confidence >= confidence_threshold_post_fusion + ).sel(model=k) + + # evaluate + detections_ds, _ = compute_precision_recall_ds( + pred_bboxes_ds=detections_one_model, + gt_bboxes_ds=gt_bboxes_ds, + iou_threshold=iou_threshold_tp, + ) + list_detections_ds_eval.append(detections_ds) + + print(f"Model: {k}") + print(f"Precision: {detections_ds.precision.mean().values:.4f}") + print(f"Recall: {detections_ds.recall.mean().values:.4f}") + print("--------------------------------") + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From d1532a883cfbf838a3546b420a8f5194c5bf6b0c Mon Sep 17 00:00:00 2001 From: Niko Sirmpilatze Date: Wed, 26 Nov 2025 19:15:04 +0000 Subject: [PATCH 30/39] Update supported Python versions to 3.11 - 3.13 (#120) * update supported Python version to 3.11 - 3.13 * Use default python version for docs build * Bring back Python version for build docs action --------- Co-authored-by: sfmig <33267254+sfmig@users.noreply.github.com> --- .github/workflows/docs_build_and_deploy.yml | 3 ++- .github/workflows/test_and_deploy.yml | 6 +++--- .pre-commit-config.yaml | 3 --- CONTRIBUTING.md | 2 +- README.md | 2 +- docs/source/environment.yml | 2 +- docs/source/installation.md | 2 +- pyproject.toml | 8 ++++---- 8 files changed, 13 insertions(+), 15 deletions(-) diff --git a/.github/workflows/docs_build_and_deploy.yml b/.github/workflows/docs_build_and_deploy.yml index a60ab166..82200ca4 100644 --- a/.github/workflows/docs_build_and_deploy.yml +++ b/.github/workflows/docs_build_and_deploy.yml @@ -37,11 +37,12 @@ jobs: steps: - uses: neuroinformatics-unit/actions/build_sphinx_docs@main with: - python-version: 3.12 + python-version: 3.13 # default for the action is 3.x use-make: true fetch-tags: true use-artifactci: lazy + deploy_sphinx_docs: name: Deploy Sphinx Docs needs: build_sphinx_docs diff --git a/.github/workflows/test_and_deploy.yml b/.github/workflows/test_and_deploy.yml index e23c718a..df909c7c 100644 --- a/.github/workflows/test_and_deploy.yml +++ b/.github/workflows/test_and_deploy.yml @@ -32,14 +32,14 @@ jobs: strategy: matrix: # Run all supported Python versions on linux - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.11", "3.12", "3.13"] os: [ubuntu-latest] # Include one windows and macos run include: - os: macos-latest - python-version: "3.11" + python-version: "3.13" - os: windows-latest - python-version: "3.11" + python-version: "3.13" steps: # Run tests diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 82f7b937..d5863f9c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -55,6 +55,3 @@ repos: rev: v2.4.1 hooks: - id: codespell - additional_dependencies: - # tomli dependency can be removed when we drop support for Python 3.10 - - tomli diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 8be95ddc..a7c1d138 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -11,7 +11,7 @@ development environment. In the following, we assume you have To install `ethology` for development, first create and activate a `conda` environment: ```sh -conda create -n ethology-dev python=3.12 +conda create -n ethology-dev python=3.13 conda activate ethology-dev ``` diff --git a/README.md b/README.md index addf5f8f..cf9b6c1f 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ Mix-and-match computer vision tools for your animal behaviour analysis. Create a conda environment and install the package ```sh -conda create -n ethology-env python=3.12 -y +conda create -n ethology-env python=3.13 -y conda activate ethology-env pip install ethology ``` diff --git a/docs/source/environment.yml b/docs/source/environment.yml index 3c0494e6..1c1bd064 100644 --- a/docs/source/environment.yml +++ b/docs/source/environment.yml @@ -3,7 +3,7 @@ channels: - conda-forge dependencies: - - python=3.12 + - python=3.13 - pytables # - pip: # - ethology diff --git a/docs/source/installation.md b/docs/source/installation.md index 327f5dda..760912f0 100644 --- a/docs/source/installation.md +++ b/docs/source/installation.md @@ -21,7 +21,7 @@ git clone https://github.com/neuroinformatics-unit/ethology.git Then create a conda environment and install the package from source ```sh -conda create -n ethology-env python=3.12 -y +conda create -n ethology-env python=3.13 -y conda activate ethology-env cd ethology pip install . diff --git a/pyproject.toml b/pyproject.toml index ad9b345e..4a7b7b63 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -3,7 +3,7 @@ name = "ethology" authors = [{ name = "Adam Tyson", email = "code@adamltyson.com" }] description = "Data processing tools for animal behavioural analysis" readme = "README.md" -requires-python = ">=3.10.0" +requires-python = ">=3.11.0" dynamic = ["version"] license = { text = "BSD-3-Clause" } @@ -12,9 +12,9 @@ classifiers = [ "Development Status :: 2 - Pre-Alpha", "Programming Language :: Python", "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: 3.13", "Operating System :: OS Independent", "License :: OSI Approved :: BSD License", ] @@ -133,14 +133,14 @@ check-hidden = true [tool.tox] legacy_tox_ini = """ [tox] -envlist = py{310,311,312} +envlist = py{311,312,313} isolated_build = True [gh-actions] python = - 3.10: py310 3.11: py311 3.12: py312 + 3.13: py313 [testenv] extras = From 399bd246208b5126e3694a207ff0c79decfb46cb Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 1 Dec 2025 19:55:20 +0000 Subject: [PATCH 31/39] [pre-commit.ci] pre-commit autoupdate (#121) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit updates: - [github.com/astral-sh/ruff-pre-commit: v0.14.3 → v0.14.7](https://github.com/astral-sh/ruff-pre-commit/compare/v0.14.3...v0.14.7) - [github.com/pre-commit/mirrors-mypy: v1.18.2 → v1.19.0](https://github.com/pre-commit/mirrors-mypy/compare/v1.18.2...v1.19.0) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d5863f9c..b1c797fc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,12 +29,12 @@ repos: - id: rst-directive-colons - id: rst-inline-touching-normal - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.3 + rev: v0.14.7 hooks: - id: ruff - id: ruff-format - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.18.2 + rev: v1.19.0 hooks: - id: mypy additional_dependencies: From a10a96d622e3e3408db868892263f2b4a610a761 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:41:15 +0000 Subject: [PATCH 32/39] Make constant attributes in validators class variables (#123) * Classvars for validators WIP * Make defaults class variables also for file validators * Update docstrings * Add uv.lock to .gitignore * Clarify comments on class variables in validators to indicate they should not be modified after initialization. * Remove class attribute from docstring (not numpy style) * Fix excluded modules in API index generation script * Add preliminary tests --- .gitignore | 3 + docs/make_api_index.py | 2 +- ethology/validators/annotations.py | 78 +++++++------------ ethology/validators/detections.py | 45 +++++------ ethology/validators/utils.py | 42 +++++----- tests/test_unit/test_validators/test_utils.py | 65 ++++++++++++++++ 6 files changed, 141 insertions(+), 94 deletions(-) create mode 100644 tests/test_unit/test_validators/test_utils.py diff --git a/.gitignore b/.gitignore index 2a7466dd..53fa4926 100644 --- a/.gitignore +++ b/.gitignore @@ -78,6 +78,9 @@ venv/ # pyenv .python-version +# uv +uv.lock + # OS .DS_Store diff --git a/docs/make_api_index.py b/docs/make_api_index.py index 934a3886..64465e24 100644 --- a/docs/make_api_index.py +++ b/docs/make_api_index.py @@ -4,7 +4,7 @@ from pathlib import Path # Modules to exclude from the API index -exclude_modules = ["ethology.io.annotations.json_schemas"] +exclude_modules = ["ethology.validators.json_schemas"] # Set the current working directory to the directory of this script script_dir = Path(__file__).resolve().parent diff --git a/ethology/validators/annotations.py b/ethology/validators/annotations.py index 0ecb886c..427440c3 100644 --- a/ethology/validators/annotations.py +++ b/ethology/validators/annotations.py @@ -2,6 +2,7 @@ import json from pathlib import Path +from typing import ClassVar import pandas as pd import pandera.pandas as pa @@ -29,9 +30,9 @@ class ValidVIA: ---------- path : Path | str Path to the VIA JSON file, passed as an input. - schema : dict + schema : ClassVar[dict] The JSON schema is set to the default VIA schema. - required_keys : dict + required_keys : ClassVar[dict] The required keys for the VIA JSON file. Raises @@ -49,21 +50,15 @@ class ValidVIA: """ path: Path = field(converter=Path) - schema: dict = field( - default=_get_default_schema("VIA"), - init=False, - ) - required_keys: dict = field( - default={ - "main": ["_via_img_metadata", "_via_attributes"], - "images": ["filename"], - "regions": ["shape_attributes"], - "shape_attributes": ["x", "y", "width", "height"], - }, - init=False, - # with init=False the attribute is always initialized - # with the default value - ) + + # class variables: should not be modified after initialization + schema: ClassVar[dict] = _get_default_schema("VIA") + required_keys: ClassVar[dict] = { + "main": ["_via_img_metadata", "_via_attributes"], + "images": ["filename"], + "regions": ["shape_attributes"], + "shape_attributes": ["x", "y", "width", "height"], + } # Note: the validators are applied in order @path.validator @@ -121,9 +116,9 @@ class ValidCOCO: ---------- path : Path | str Path to the COCO JSON file, passed as an input. - schema : dict + schema : ClassVar[dict] The JSON schema is set to the default COCO schema. - required_keys : dict + required_keys : ClassVar[dict] The required keys for the COCO JSON file. Raises @@ -141,23 +136,15 @@ class ValidCOCO: """ path: Path = field(converter=Path) - schema: dict = field( - default=_get_default_schema("COCO"), - init=False, - # with init=False the attribute is always initialized - # with the default value - ) - # The keys of "required_keys" match the 1st level keys in a COCO JSON file - required_keys: dict = field( - default={ - "main": ["images", "annotations", "categories"], - "images": ["id", "file_name", "width", "height"], - "annotations": ["id", "image_id", "bbox", "category_id"], - "categories": ["id", "name"], # exclude "supercategory" - }, - init=False, - ) + # class variables: should not be modified after initialization + schema: ClassVar[dict] = _get_default_schema("COCO") + required_keys: ClassVar[dict] = { + "main": ["images", "annotations", "categories"], + "images": ["id", "file_name", "width", "height"], + "annotations": ["id", "image_id", "bbox", "category_id"], + "categories": ["id", "name"], # exclude "supercategory" + } # keys match the 1st level keys in a COCO JSON file # Note: the validators are applied in order @path.validator @@ -241,10 +228,10 @@ class ValidBboxAnnotationsDataset(ValidDataset): ---------- dataset : xarray.Dataset The xarray dataset to validate. - required_dims : set[str] + required_dims : ClassVar[set] The set of required dimension names: ``image_id``, ``space`` and ``id``. - required_data_vars : dict[str, set] + required_data_vars : ClassVar[dict[str, set]] A dictionary mapping data variable names to their required minimum dimensions: @@ -267,17 +254,12 @@ class ValidBboxAnnotationsDataset(ValidDataset): """ # Minimum requirements for a bbox dataset holding detections - required_dims: set = field( - default={"image_id", "space", "id"}, - init=False, - ) - required_data_vars: dict = field( - default={ - "position": {"image_id", "space", "id"}, - "shape": {"image_id", "space", "id"}, - }, - init=False, - ) + # Should not be modified after initialization + required_dims: ClassVar[set] = {"image_id", "space", "id"} + required_data_vars: ClassVar[dict[str, set]] = { + "position": {"image_id", "space", "id"}, + "shape": {"image_id", "space", "id"}, + } class ValidBboxAnnotationsDataFrame(pa.DataFrameModel): diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py index a22dab62..87268bea 100644 --- a/ethology/validators/detections.py +++ b/ethology/validators/detections.py @@ -1,6 +1,8 @@ """Validators for detection datasets.""" -from attrs import define, field +from typing import ClassVar + +from attrs import define from ethology.validators.utils import ValidDataset @@ -23,10 +25,10 @@ class ValidBboxDetectionsDataset(ValidDataset): ---------- dataset : xarray.Dataset The xarray dataset to validate. - required_dims : set + required_dims : ClassVar[set] The set of required dimension names: ``image_id``, ``space`` and ``id``. - required_data_vars : dict[str, set] + required_data_vars : ClassVar[dict[str, set]] A dictionary mapping data variable names to their required minimum dimensions: @@ -50,18 +52,13 @@ class ValidBboxDetectionsDataset(ValidDataset): """ # Minimum requirements for a bbox dataset holding detections - required_dims: set = field( - default={"image_id", "space", "id"}, - init=False, - ) - required_data_vars: dict = field( - default={ - "position": {"image_id", "space", "id"}, - "shape": {"image_id", "space", "id"}, - "confidence": {"image_id", "id"}, - }, - init=False, - ) + # Should not be modified after initialization + required_dims: ClassVar[set] = {"image_id", "space", "id"} + required_data_vars: ClassVar[dict[str, set]] = { + "position": {"image_id", "space", "id"}, + "shape": {"image_id", "space", "id"}, + "confidence": {"image_id", "id"}, + } @define @@ -110,15 +107,9 @@ class ValidBboxDetectionsEnsembleDataset(ValidDataset): """ # Minimum requirements for a bbox dataset holding detections - required_dims: set = field( - default={"image_id", "space", "id", "model"}, - init=False, - ) - required_data_vars: dict = field( - default={ - "position": {"image_id", "space", "id", "model"}, - "shape": {"image_id", "space", "id", "model"}, - "confidence": {"image_id", "id", "model"}, - }, - init=False, - ) + required_dims: ClassVar[set] = {"image_id", "space", "id", "model"} + required_data_vars: ClassVar[dict] = { + "position": {"image_id", "space", "id", "model"}, + "shape": {"image_id", "space", "id", "model"}, + "confidence": {"image_id", "id", "model"}, + } diff --git a/ethology/validators/utils.py b/ethology/validators/utils.py index ce74a289..ce85ff0a 100644 --- a/ethology/validators/utils.py +++ b/ethology/validators/utils.py @@ -1,8 +1,9 @@ """Utils for validating `ethology` objects.""" -from abc import ABC, abstractmethod +from abc import ABC from collections.abc import Callable from functools import wraps +from typing import ClassVar import xarray as xr from attrs import define, field @@ -20,18 +21,18 @@ class ValidDataset(ABC): - has the correct dimensions for each data variable Subclasses must define ``required_dims`` and ``required_data_vars`` - attributes. + class attributes. Attributes ---------- dataset : xarray.Dataset The xarray dataset to validate. - required_dims : set[str] - A set of required dimension names. This attribute should be + required_dims : ClassVar[set[str]] + A set of required dimension names. This class attribute must be defined by any subclass inheriting from this class. - required_data_vars : dict[str, set] + required_data_vars : ClassVar[dict[str, set]] A dictionary mapping data variable names to their required dimensions. - This attribute should be defined by any subclass inheriting from + This class attribute must be defined by any subclass inheriting from this class. Raises @@ -51,18 +52,23 @@ class ValidDataset(ABC): dataset: xr.Dataset = field() - # Subclasses should override these abstract properties - @property - @abstractmethod - def required_dims(self) -> set: - """Subclasses must provide a ``required_dims`` property.""" - pass # pragma: no cover - - @property - @abstractmethod - def required_data_vars(self) -> dict[str, set]: - """Subclasses must provide a ``required_data_vars`` property.""" - pass # pragma: no cover + # class variables + required_dims: ClassVar[set] + required_data_vars: ClassVar[dict[str, set]] + + def __init_subclass__(cls, **kwargs): + """Verify that subclasses define required class variables.""" + super().__init_subclass__(**kwargs) + + if not hasattr(cls, "required_dims"): + raise TypeError( + f"{cls.__name__} must define 'required_dims' class variable" + ) + if not hasattr(cls, "required_data_vars"): + raise TypeError( + f"{cls.__name__} must define 'required_data_vars' " + "class variable" + ) # Validators @dataset.validator diff --git a/tests/test_unit/test_validators/test_utils.py b/tests/test_unit/test_validators/test_utils.py new file mode 100644 index 00000000..ac91ddaa --- /dev/null +++ b/tests/test_unit/test_validators/test_utils.py @@ -0,0 +1,65 @@ +import pytest +from attrs import define + +from ethology.validators.utils import ValidDataset + + +@pytest.mark.parametrize( + "missing_attr, expected_error_match", + [ + ( + "required_dims", + ".*must define 'required_dims' class variable", + ), + ( + "required_data_vars", + ".*must define 'required_data_vars' class variable", + ), + ( + "both", + ".*must define 'required_dims' class variable", + ), + ], + ids=[ + "missing_required_dims", + "missing_required_data_vars", + "missing_both_class_vars", + ], +) +def test_subclass_missing_class_vars_raises_type_error( + missing_attr, expected_error_match +): + """Test that subclasses without required class vars raise TypeError.""" + with pytest.raises(TypeError, match=expected_error_match): + if missing_attr == "required_dims": + + @define + class InvalidDataset(ValidDataset): + required_data_vars = {"position": {"x", "y"}} + + elif missing_attr == "required_data_vars": + + @define + class InvalidDataset(ValidDataset): + required_dims = {"x", "y"} + + else: + + @define + class InvalidDataset(ValidDataset): + pass + + +def test_subclass_with_both_class_vars_does_not_raise(): + """Test that a valid subclass with both class vars works correctly.""" + required_dims_in = {"x", "y"} + required_data_vars_in = {"position": {"x", "y"}} + + @define + class ValidCustomDataset(ValidDataset): + required_dims = required_dims_in + required_data_vars = required_data_vars_in + + # Verify the class attributes + assert ValidCustomDataset.required_dims == required_dims_in + assert ValidCustomDataset.required_data_vars == required_data_vars_in From 92028667db9da543d443dad882830ff49af578d0 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:25:10 +0000 Subject: [PATCH 33/39] Expand validator tests for ensemble validator --- ethology/validators/detections.py | 9 +- .../test_validators/test_detections.py | 238 ++++++++++++------ 2 files changed, 162 insertions(+), 85 deletions(-) diff --git a/ethology/validators/detections.py b/ethology/validators/detections.py index 87268bea..a91be7a9 100644 --- a/ethology/validators/detections.py +++ b/ethology/validators/detections.py @@ -63,7 +63,7 @@ class ValidBboxDetectionsDataset(ValidDataset): @define class ValidBboxDetectionsEnsembleDataset(ValidDataset): - """Class for valid ``ethology`` bounding box ensembledetections datasets. + """Class for valid ``ethology`` bounding box ensemble detections datasets. This class validates that the input dataset: @@ -80,10 +80,10 @@ class ValidBboxDetectionsEnsembleDataset(ValidDataset): ---------- dataset : xarray.Dataset The xarray dataset to validate. - required_dims : set + required_dims : ClassVar[set] The set of required dimension names: ``image_id``, ``space``, ``id`` - and ``model``. - required_data_vars : dict[str, set] + and ``model``. + required_data_vars : ClassVar[dict[str, set]] A dictionary mapping data variable names to their required minimum dimensions: @@ -107,6 +107,7 @@ class ValidBboxDetectionsEnsembleDataset(ValidDataset): """ # Minimum requirements for a bbox dataset holding detections + # Should not be modified after initialization required_dims: ClassVar[set] = {"image_id", "space", "id", "model"} required_data_vars: ClassVar[dict] = { "position": {"image_id", "space", "id", "model"}, diff --git a/tests/test_unit/test_validators/test_detections.py b/tests/test_unit/test_validators/test_detections.py index d053d6ef..5a60da56 100644 --- a/tests/test_unit/test_validators/test_detections.py +++ b/tests/test_unit/test_validators/test_detections.py @@ -4,7 +4,10 @@ import pytest import xarray as xr -from ethology.validators.detections import ValidBboxDetectionsDataset +from ethology.validators.detections import ( + ValidBboxDetectionsDataset, + ValidBboxDetectionsEnsembleDataset, +) @pytest.fixture @@ -38,6 +41,28 @@ def valid_bbox_detections_dataset(): return ds +@pytest.fixture +def valid_bbox_detections_ensemble_dataset(valid_bbox_detections_dataset): + """Create a valid bbox detections ensemble_dataset for validation.""" + # Add model dimension + ds = valid_bbox_detections_dataset.expand_dims( + model=["model_a", "model_b"] + ) + + return ds + + +@pytest.fixture +def valid_bbox_detections_ensemble_dataset_extra_vars_and_dims( + valid_bbox_detections_ensemble_dataset: xr.Dataset, +) -> xr.Dataset: + ds = valid_bbox_detections_ensemble_dataset.copy(deep=True) + ds.coords["extra_dim"] = [10, 20, 30] + ds["extra_var_1"] = (["image_id"], np.random.rand(len(ds.image_id))) + ds["extra_var_2"] = (["id"], np.random.rand(len(ds.id))) + return ds + + @pytest.fixture def valid_bbox_detections_dataset_extra_vars_and_dims( valid_bbox_detections_dataset: xr.Dataset, @@ -49,44 +74,71 @@ def valid_bbox_detections_dataset_extra_vars_and_dims( return ds +# Define validator configurations +VALIDATOR_CONFIGS: dict = { + "detections_ds": { + "validator_class": ValidBboxDetectionsDataset, + "valid_fixture": "valid_bbox_detections_dataset", + "valid_fixture_extra": ( + "valid_bbox_detections_dataset_extra_vars_and_dims" + ), + "required_dims": {"image_id", "space", "id"}, + "required_data_vars": { + "position": {"image_id", "space", "id"}, + "shape": {"image_id", "space", "id"}, + "confidence": {"image_id", "id"}, + }, + }, + "ensemble_ds": { + "validator_class": ValidBboxDetectionsEnsembleDataset, + "valid_fixture": "valid_bbox_detections_ensemble_dataset", + "valid_fixture_extra": ( + "valid_bbox_detections_ensemble_dataset_extra_vars_and_dims" + ), + "required_dims": {"image_id", "space", "id", "model"}, + "required_data_vars": { + "position": {"image_id", "space", "id", "model"}, + "shape": {"image_id", "space", "id", "model"}, + "confidence": {"image_id", "id", "model"}, + }, + }, +} + + +@pytest.mark.parametrize("validator_type", ["detections_ds", "ensemble_ds"]) +@pytest.mark.parametrize( + "valid_fixture_key", + [ + "valid_fixture", + "valid_fixture_extra", + ], +) +def test_validator_bbox_detections_dataset_valid( + validator_type: str, + valid_fixture_key: str, + request: pytest.FixtureRequest, +): + """Test bbox detections dataset validation with valid datasets.""" + config = VALIDATOR_CONFIGS[validator_type] + fixture_name = config[valid_fixture_key] + dataset = request.getfixturevalue(fixture_name) + + validator_class = config["validator_class"] + with does_not_raise(): + validator = validator_class(dataset=dataset) + + assert validator.dataset is dataset + assert validator.required_dims == config["required_dims"] + assert validator.required_data_vars == config["required_data_vars"] + + +@pytest.mark.parametrize( + "validator", + [ValidBboxDetectionsDataset, ValidBboxDetectionsEnsembleDataset], +) @pytest.mark.parametrize( "sample_dataset, expected_exception, expected_error_message", [ - ( - "valid_bbox_detections_dataset", - does_not_raise(), - "", - ), - ( - "valid_bbox_detections_dataset_extra_vars_and_dims", - does_not_raise(), - "", - ), - ( - xr.Dataset( - coords={ - "image_id": np.arange(3), - "space": np.arange(2), - "id": np.arange(2), - }, - data_vars={ - "position": ( - ["image_id", "space", "id"], - np.zeros((3, 2, 2)), - ), - "shape": ( - ["image_id", "space", "id", "foo"], - np.zeros((3, 2, 2, 1)), - ), - "confidence": ( - ["image_id", "id"], - np.zeros((3, 2)), - ), - }, - ), - does_not_raise(), - "", - ), ( {"position": [1, 2, 3], "shape": [4, 5, 6]}, pytest.raises(TypeError), @@ -130,13 +182,56 @@ def valid_bbox_detections_dataset_extra_vars_and_dims( pytest.raises(ValueError), "Missing required data variables: ['confidence', 'shape']", ), + ], + ids=[ + "invalid_type", + "invalid_missing_data_var", + "invalid_missing_multiple_data_vars", + ], +) +def test_validator_bbox_detections_dataset_invalid( + validator: type[ValidBboxDetectionsDataset] + | type[ValidBboxDetectionsEnsembleDataset], + sample_dataset: xr.Dataset, + expected_exception: pytest.raises, + expected_error_message: str, +): + """Test bbox annotations dataset validation in various input scenarios.""" + # Run validation and check exception + with expected_exception as excinfo: + _validator = validator(dataset=sample_dataset) + if excinfo: + error_msg = str(excinfo.value) + assert error_msg in expected_error_message + + +@pytest.mark.parametrize( + "validator", + [ValidBboxDetectionsDataset, ValidBboxDetectionsEnsembleDataset], +) +@pytest.mark.parametrize( + "sample_dataset, expected_exception, expected_error_message", + [ ( xr.Dataset( - coords={"image_id": np.arange(3), "id": np.arange(2)}, + coords={ + "image_id": np.arange(3), + "id": np.arange(2), + "model": np.arange(2), + }, data_vars={ - "position": (["image_id", "id"], np.zeros((3, 2))), - "shape": (["image_id", "id"], np.zeros((3, 2))), - "confidence": (["image_id", "id"], np.zeros((3, 2))), + "position": ( + ["image_id", "id", "model"], + np.zeros((3, 2, 2)), + ), + "shape": ( + ["image_id", "id", "model"], + np.zeros((3, 2, 2)), + ), + "confidence": ( + ["image_id", "id", "model"], + np.zeros((3, 2, 2)), + ), }, ), pytest.raises(ValueError), @@ -148,19 +243,20 @@ def valid_bbox_detections_dataset_extra_vars_and_dims( "foo": np.arange(3), "bar": ["x", "y"], "id": np.arange(2), + "model": np.arange(2), }, data_vars={ "position": ( - ["foo", "bar", "id"], - np.zeros((3, 2, 2)), + ["foo", "bar", "id", "model"], + np.zeros((3, 2, 2, 2)), ), "shape": ( - ["foo", "bar", "id"], - np.zeros((3, 2, 2)), + ["foo", "bar", "id", "model"], + np.zeros((3, 2, 2, 2)), ), "confidence": ( - ["foo", "id"], - np.zeros((3, 2)), + ["foo", "id", "model"], + np.zeros((3, 2, 2)), ), }, ), @@ -173,19 +269,20 @@ def valid_bbox_detections_dataset_extra_vars_and_dims( "image_id": np.arange(3), "space": np.arange(2), "id": np.arange(2), + "model": np.arange(2), }, data_vars={ "position": ( - ["image_id", "space", "id"], - np.zeros((3, 2, 2)), + ["image_id", "space", "id", "model"], + np.zeros((3, 2, 2, 2)), ), "shape": ( - ["image_id", "id"], - np.zeros((3, 2)), + ["image_id", "id", "model"], + np.zeros((3, 2, 2)), ), "confidence": ( - ["image_id", "id"], - np.zeros((3, 2)), + ["image_id", "id", "model"], + np.zeros((3, 2, 2)), ), }, ), @@ -197,42 +294,21 @@ def valid_bbox_detections_dataset_extra_vars_and_dims( ), ], ids=[ - "valid_bbox_detections", - "valid_bbox_detections_extra_vars_and_dims", - "valid_bbox_detections_extra_dims_in_shape_var", - "invalid_bbox_detections_type", - "invalid_bbox_detections_dataset_missing_data_var", - "invalid_bbox_detections_missing_multiple_data_vars", - "invalid_bbox_detections_missing_dimension", - "invalid_bbox_detections_missing_multiple_dimensions", - "invalid_bbox_detections_missing_dimension_in_data_var", + "invalid_missing_dimension", + "invalid_missing_multiple_dimensions", + "invalid_missing_dimension_in_data_var", ], ) -def test_validator_bbox_detections_dataset( - sample_dataset: str | dict, +def test_validator_bbox_detections_dataset_missing_dims( + validator: type[ValidBboxDetectionsDataset] + | type[ValidBboxDetectionsEnsembleDataset], + sample_dataset: xr.Dataset, expected_exception: pytest.raises, expected_error_message: str, - request: pytest.FixtureRequest, ): - """Test bbox annotations dataset validation in various input scenarios.""" - # Get dataset to validate - if isinstance(sample_dataset, str): - dataset = request.getfixturevalue(sample_dataset) - else: - dataset = sample_dataset - # Run validation and check exception with expected_exception as excinfo: - validator = ValidBboxDetectionsDataset(dataset=dataset) - + _validator = validator(dataset=sample_dataset) if excinfo: error_msg = str(excinfo.value) assert error_msg in expected_error_message - else: - assert validator.dataset is dataset - assert validator.required_dims == {"image_id", "space", "id"} - assert validator.required_data_vars == { - "position": {"image_id", "space", "id"}, - "shape": {"image_id", "space", "id"}, - "confidence": {"image_id", "id"}, - } From 3aa7f4368c0760715335f9f6105fe2c7a543a622 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:48:43 +0000 Subject: [PATCH 34/39] Start test for utils --- .../test_detectors_ensembles/test_utils.py | 66 +++++++++++++++++++ 1 file changed, 66 insertions(+) create mode 100644 tests/test_unit/test_detectors_ensembles/test_utils.py diff --git a/tests/test_unit/test_detectors_ensembles/test_utils.py b/tests/test_unit/test_detectors_ensembles/test_utils.py new file mode 100644 index 00000000..c88199af --- /dev/null +++ b/tests/test_unit/test_detectors_ensembles/test_utils.py @@ -0,0 +1,66 @@ +import numpy as np +import pytest + +from ethology.detectors.ensembles.utils import ( + _centroid_shape_to_corners, + _corners_to_centroid_shape, + _get_padding_width, + _pad_to_max_first_dimension, +) + + +def test_get_padding_width(): + pass + + +@pytest.mark.parametrize( + "fill_value", + [ + np.nan, + np.inf, + 42 + ], +) +def test_pad_to_max_first_dimension(fill_value): + """Test padding all arrays in list along first dimension.""" + # Get max array length + list_arrays = [np.zeros((1, 2, 3)), np.zeros((10, 2, 3))] + max_array_length = max([arr.shape[0] for arr in list_arrays]) + + # Pad + list_arrays_padded = _pad_to_max_first_dimension(list_arrays, fill_value) + + # Assert all same length + assert all( + [arr.shape[0] == max_array_length for arr in list_arrays_padded] + ) + # Assert other dimensions stay the same + assert all( + [ + arr.shape[1:] == arr_input.shape[1:] + for arr, arr_input in zip( + list_arrays_padded, list_arrays, strict=True + ) + ] + ) + # Assert padding value + assert all( + [ + np.allclose( + arr[arr_input.shape[0]:], + np.full_like(arr[arr_input.shape[0]:], fill_value), + equal_nan=True, + ) + for arr, arr_input in zip( + list_arrays_padded, list_arrays, strict=True + ) + ] + ) + + +def test_centroid_shape_to_corners(): + pass + + +def test_corners_to_centroid_shape(): + pass From b18a3e578992c8aa408360c7e609511d5f488a88 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:08:32 +0000 Subject: [PATCH 35/39] Make utils private --- ethology/detectors/ensembles/fusion.py | 8 ++++---- ethology/detectors/ensembles/models.py | 10 +++++----- ethology/detectors/ensembles/utils.py | 24 +++++++++++++++++------- 3 files changed, 26 insertions(+), 16 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 2b0801ac..3452e37a 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -12,8 +12,8 @@ from tqdm import tqdm from ethology.detectors.ensembles.utils import ( - centroid_shape_to_corners, - corners_to_centroid_shape, + _centroid_shape_to_corners, + _corners_to_centroid_shape, ) from ethology.validators.detections import ( ValidBboxDetectionsDataset, @@ -251,7 +251,7 @@ def _preprocess_single_image_detections( """Prepare detections of an ensemble on a single image for fusion.""" # Prepare boxes array # transform position and shape arrays to x1y1x2y normalised - x1y1, x2y2 = centroid_shape_to_corners(position, shape) + x1y1, x2y2 = _centroid_shape_to_corners(position, shape) bboxes_x1y1 = x1y1 / image_width_height[:, None, None] bboxes_x2y2 = x2y2 / image_width_height[:, None, None] bboxes_x1y1_x2y2_normalised = np.transpose( @@ -371,7 +371,7 @@ def _parse_single_image_detections_as_dataarrays( id_array = np.arange(n_detections) # Extract bbox centre and shape - centroid, shape = corners_to_centroid_shape( + centroid, shape = _corners_to_centroid_shape( x1y1_x2y2_array[:, 0:2], x1y1_x2y2_array[:, 2:4] ) diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index 1830fc71..ad6fe51d 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -13,8 +13,8 @@ from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import ( - corners_to_centroid_shape, - pad_to_max_first_dimension, + _corners_to_centroid_shape, + _pad_to_max_first_dimension, ) from ethology.validators.detections import ValidBboxDetectionsEnsembleDataset from ethology.validators.utils import _check_output @@ -173,11 +173,11 @@ def format_predictions( ky: [] for ky in output_per_sample } for ky in output_per_sample_padded: - output_per_sample_padded[ky] = pad_to_max_first_dimension( + output_per_sample_padded[ky] = _pad_to_max_first_dimension( [ # pad across models np.stack( - pad_to_max_first_dimension( + _pad_to_max_first_dimension( output_one_sample, fill_value[ky] ), axis=-1, @@ -199,7 +199,7 @@ def format_predictions( # Compute centroid and shape arrays # centroid_array = 0.5 * (bboxes_array[:, 0:2] + bboxes_array[:, 2:4]) # shape_array = bboxes_array[:, 2:4] - bboxes_array[:, 0:2] - centroid_array, shape_array = corners_to_centroid_shape( + centroid_array, shape_array = _corners_to_centroid_shape( bboxes_array[:, 0:2], bboxes_array[:, 2:4] ) diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py index 03ff3b2d..ab1a757c 100644 --- a/ethology/detectors/ensembles/utils.py +++ b/ethology/detectors/ensembles/utils.py @@ -3,20 +3,20 @@ import numpy as np -def get_padding_width(array, max_n): +def _get_padding_width(array, max_n): """Get pad width for array to max_n detections in the first dimension.""" pad_width = array.ndim * [(0, 0)] pad_width[0] = (0, max_n - array.shape[0]) # before, after return pad_width -def pad_to_max_first_dimension(list_arrays, fill_value=np.nan): +def _pad_to_max_first_dimension(list_arrays, fill_value=np.nan): """Pad arrays in list to maximum size of their first dimension.""" max_n_detections = max(array.shape[0] for array in list_arrays) list_arrays_padded = [ np.pad( arr, - get_padding_width(arr, max_n_detections), + _get_padding_width(arr, max_n_detections), mode="constant", constant_values=fill_value, ) @@ -25,8 +25,15 @@ def pad_to_max_first_dimension(list_arrays, fill_value=np.nan): return list_arrays_padded -def centroid_shape_to_corners(position, shape): - """Convert centroid and shape arrays to x1y1, x2y2 corner arrays.""" +def _centroid_shape_to_corners(position, shape): + """Convert centroid and shape arrays to x1y1, x2y2 corner arrays. + + x1y1 is the top left corner (min x-coordinate, min y-coordinate), + x2y2 is the bottom right corner (max x-coordinate, max y-coordinate) + of the bounding box. + + Space dimension is assumed to be the second dimension. + """ half_shape = shape / 2 return ( position - half_shape, # x1y1 @@ -34,8 +41,11 @@ def centroid_shape_to_corners(position, shape): ) -def corners_to_centroid_shape(x1y1, x2y2): - """Convert x1y1, x2y2 corner arrays to centroid and shape arrays.""" +def _corners_to_centroid_shape(x1y1, x2y2): + """Convert x1y1, x2y2 corner arrays to centroid and shape arrays. + + Space dimension is assumed to be the second dimension. + """ return ( 0.5 * (x1y1 + x2y2), # centroid x2y2 - x1y1, # shape From 57333f676f86fcb618606ebd31424961e8bbf86a Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:08:49 +0000 Subject: [PATCH 36/39] add basic tests for utils --- tests/test_unit/test_datasets/__init__.py | 0 .../test_detectors_ensembles/__init__.py | 0 .../test_detectors_ensembles/test_utils.py | 77 +++++++++++++++---- tests/test_unit/test_validators/__init__.py | 0 4 files changed, 64 insertions(+), 13 deletions(-) create mode 100644 tests/test_unit/test_datasets/__init__.py create mode 100644 tests/test_unit/test_detectors_ensembles/__init__.py create mode 100644 tests/test_unit/test_validators/__init__.py diff --git a/tests/test_unit/test_datasets/__init__.py b/tests/test_unit/test_datasets/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_unit/test_detectors_ensembles/__init__.py b/tests/test_unit/test_detectors_ensembles/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test_unit/test_detectors_ensembles/test_utils.py b/tests/test_unit/test_detectors_ensembles/test_utils.py index c88199af..7835fcbd 100644 --- a/tests/test_unit/test_detectors_ensembles/test_utils.py +++ b/tests/test_unit/test_detectors_ensembles/test_utils.py @@ -9,17 +9,40 @@ ) -def test_get_padding_width(): - pass +@pytest.mark.parametrize( + "array, target_first_dim, expected_pad_width_first_dim", + [ + ( + np.zeros((3,)), + 5, + (0, 2), + ), # 1D array + ( + np.zeros((1, 2, 3)), + 4, + (0, 3), + ), # 3D array + ( + np.zeros((10, 2, 3)), + 10, + (0, 0), + ), # No padding needed + ], +) +def test_get_padding_width( + array, target_first_dim, expected_pad_width_first_dim +): + """Test getting padding width for arrays of different dimensions.""" + pad_width = _get_padding_width(array, target_first_dim) + + assert len(pad_width) == array.ndim + assert pad_width[0] == expected_pad_width_first_dim + assert all(pw == (0, 0) for pw in pad_width[1:]) @pytest.mark.parametrize( "fill_value", - [ - np.nan, - np.inf, - 42 - ], + [np.nan, np.inf, 42], ) def test_pad_to_max_first_dimension(fill_value): """Test padding all arrays in list along first dimension.""" @@ -47,8 +70,8 @@ def test_pad_to_max_first_dimension(fill_value): assert all( [ np.allclose( - arr[arr_input.shape[0]:], - np.full_like(arr[arr_input.shape[0]:], fill_value), + arr[arr_input.shape[0] :], + np.full_like(arr[arr_input.shape[0] :], fill_value), equal_nan=True, ) for arr, arr_input in zip( @@ -58,9 +81,37 @@ def test_pad_to_max_first_dimension(fill_value): ) -def test_centroid_shape_to_corners(): - pass +@pytest.mark.parametrize( + "position, shape, expected_x1y1, expected_x2y2", + [ + ( + np.zeros((1, 2)), + np.array([[4, 2]]), + np.array([[-2, -1]]), + np.array([[2, 1]]), + ) + ], +) +def test_centroid_shape_to_corners( + position, shape, expected_x1y1, expected_x2y2 +): + x1y1, x2y2 = _centroid_shape_to_corners(position, shape) + np.testing.assert_array_equal(x1y1, expected_x1y1) + np.testing.assert_array_equal(x2y2, expected_x2y2) -def test_corners_to_centroid_shape(): - pass +@pytest.mark.parametrize( + "x1y1, x2y2, expected_position, expected_shape", + [ + ( + np.zeros((1, 2)), + np.ones((1, 2)), + np.array([[0.5, 0.5]]), + np.array([[1, 1]]), + ) + ], +) +def test_corners_to_centroid_shape(x1y1, x2y2, expected_position, expected_shape): + position, shape = _corners_to_centroid_shape(x1y1, x2y2) + np.testing.assert_array_equal(position, expected_position) + np.testing.assert_array_equal(shape, expected_shape) diff --git a/tests/test_unit/test_validators/__init__.py b/tests/test_unit/test_validators/__init__.py new file mode 100644 index 00000000..e69de29b From 1c0887529cba4c255974b3802a16fcbffb581394 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:22:48 +0000 Subject: [PATCH 37/39] pre-commit fixes --- ethology/detectors/ensembles/fusion.py | 27 ++++++++++--------- ethology/detectors/ensembles/models.py | 1 - ethology/detectors/ensembles/utils.py | 8 +++--- .../test_detectors_ensembles/test_utils.py | 4 ++- 4 files changed, 21 insertions(+), 19 deletions(-) diff --git a/ethology/detectors/ensembles/fusion.py b/ethology/detectors/ensembles/fusion.py index 3452e37a..d0dbfe71 100644 --- a/ethology/detectors/ensembles/fusion.py +++ b/ethology/detectors/ensembles/fusion.py @@ -142,7 +142,7 @@ def _postprocess_multi_image_fused_arrays( """ # Transpose results from list-of-tuples to tuple-of-lists da_names = ("position", "shape", "confidence", "label") - da_lists = zip(*results_per_img_id) + da_lists = zip(*results_per_img_id, strict=True) # Concatenate lists of dataarrays along image_id dimension and # remove extra padding in "id" dimension @@ -182,8 +182,8 @@ def _validate_image_shape(image_shape) -> np.ndarray: def _estimate_max_n_detections(ensemble_detections_ds: xr.Dataset) -> int: """Get upper bound for maximum number of boxes per image after fusion. - We assume no detections are fused and all images have as many detections as the maximum - number of non-nan detections per image. + We assume no detections are fused and all images have as many + detections as the maximum number of non-nan detections per image. """ detections_w_non_nan_position = ( ensemble_detections_ds.position.notnull().all(dim="space") @@ -209,7 +209,7 @@ def _fuse_single_image_detections( max_n_detections: int, **fusion_kwargs: Unpack[_TypeFusionMethodKwargs], # method-only kwargs ) -> TupleFourDataArrays: - """Fuse detections across models for a single image using selected method.""" + """Fuse detections for a single image with selected method.""" # Prepare single image arrays for fusion list_bboxes_per_model, list_confidence_per_model, list_label_per_model = ( _preprocess_single_image_detections( @@ -284,20 +284,21 @@ def _preprocess_single_image_detections( ] return ( _chop_end_of_array( - list_arrays_per_model, list_non_nan_bboxes_per_model - ) - for list_arrays_per_model in [ - list_x1y1_x2y2_norm_per_model, - list_confidence_per_model, - list_label_per_model, - ] + list_x1y1_x2y2_norm_per_model, list_non_nan_bboxes_per_model + ), + _chop_end_of_array( + list_confidence_per_model, list_non_nan_bboxes_per_model + ), + _chop_end_of_array( + list_label_per_model, list_non_nan_bboxes_per_model + ), ) def _chop_end_of_array( list_arrays: list[np.ndarray], list_end_lengths: list[int] ) -> list[np.ndarray]: - """Chop end of arrays in list to the desired length along the first dimension.""" + """Chop end of arrays in list to desired length along first dimension.""" return [ arr[:n] for arr, n in zip(list_arrays, list_end_lengths, strict=True) ] @@ -346,7 +347,7 @@ def _postprocess_single_image_detections( def _remove_nan_and_pad_to_max( input_array, mask_non_nan_rows, max_n_detections, fill_value=np.nan ): - """Remove non-nan from input array and pad with nans, all along first dimension.""" + """Remove non-nan from input array and pad, all along first dimension.""" # Initialise array with nans padded_array = np.full( (max_n_detections, *input_array.shape[1:]), diff --git a/ethology/detectors/ensembles/models.py b/ethology/detectors/ensembles/models.py index ad6fe51d..ab1eada4 100644 --- a/ethology/detectors/ensembles/models.py +++ b/ethology/detectors/ensembles/models.py @@ -9,7 +9,6 @@ import xarray as xr import yaml from lightning import LightningModule -from torch.nn.parallel import parallel_apply from torchvision.models import detection, get_model, list_models from ethology.detectors.ensembles.utils import ( diff --git a/ethology/detectors/ensembles/utils.py b/ethology/detectors/ensembles/utils.py index ab1a757c..4a686d44 100644 --- a/ethology/detectors/ensembles/utils.py +++ b/ethology/detectors/ensembles/utils.py @@ -27,9 +27,9 @@ def _pad_to_max_first_dimension(list_arrays, fill_value=np.nan): def _centroid_shape_to_corners(position, shape): """Convert centroid and shape arrays to x1y1, x2y2 corner arrays. - - x1y1 is the top left corner (min x-coordinate, min y-coordinate), - x2y2 is the bottom right corner (max x-coordinate, max y-coordinate) + + x1y1 is the top left corner (min x-coordinate, min y-coordinate), + x2y2 is the bottom right corner (max x-coordinate, max y-coordinate) of the bounding box. Space dimension is assumed to be the second dimension. @@ -43,7 +43,7 @@ def _centroid_shape_to_corners(position, shape): def _corners_to_centroid_shape(x1y1, x2y2): """Convert x1y1, x2y2 corner arrays to centroid and shape arrays. - + Space dimension is assumed to be the second dimension. """ return ( diff --git a/tests/test_unit/test_detectors_ensembles/test_utils.py b/tests/test_unit/test_detectors_ensembles/test_utils.py index 7835fcbd..fcd1a54a 100644 --- a/tests/test_unit/test_detectors_ensembles/test_utils.py +++ b/tests/test_unit/test_detectors_ensembles/test_utils.py @@ -111,7 +111,9 @@ def test_centroid_shape_to_corners( ) ], ) -def test_corners_to_centroid_shape(x1y1, x2y2, expected_position, expected_shape): +def test_corners_to_centroid_shape( + x1y1, x2y2, expected_position, expected_shape +): position, shape = _corners_to_centroid_shape(x1y1, x2y2) np.testing.assert_array_equal(position, expected_position) np.testing.assert_array_equal(shape, expected_shape) From 5399fef73580b9686d9b36bd71eb37a653d5e196 Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:29:05 +0000 Subject: [PATCH 38/39] Remove example notebook --- examples/ensemble_of_detectors.py | 333 ------------------------------ 1 file changed, 333 deletions(-) delete mode 100644 examples/ensemble_of_detectors.py diff --git a/examples/ensemble_of_detectors.py b/examples/ensemble_of_detectors.py deleted file mode 100644 index 86911633..00000000 --- a/examples/ensemble_of_detectors.py +++ /dev/null @@ -1,333 +0,0 @@ -"""Evaluating ensemble of trained detectors.""" -# %% -# imports - -from pathlib import Path - -import numpy as np -import torch -import torchvision.transforms.v2 as transforms -import xarray as xr -import yaml -from lightning import Trainer -from matplotlib import pyplot as plt -from torch.utils.data import DataLoader -from torchvision.datasets import CocoDetection, wrap_dataset_for_transforms_v2 - -from ethology.detectors.ensembles.fusion import fuse_detections -from ethology.detectors.ensembles.models import EnsembleDetector -from ethology.detectors.evaluate import compute_precision_recall_ds -from ethology.io.annotations import load_bboxes - -# %% -# %matplotlib widget -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% - - -# Helper functions -def create_coco_dataset( - images_dir: str | Path, - annotations_file: str | Path, - composed_transform: transforms.Compose, -) -> CocoDetection: - """Create a COCO dataset for object detection. - - Note: transforms are applied to the full dataset. If the dataset - is later split, all splits will have the same transforms. - """ - dataset_coco = CocoDetection( - root=images_dir, - annFile=annotations_file, - transforms=composed_transform, - ) - - # wrap dataset for transforms v2 - dataset_transformed = wrap_dataset_for_transforms_v2(dataset_coco) - - return dataset_transformed - - -def collate_fn_varying_n_bboxes(batch: tuple) -> tuple: - """Collate function for dataloader with varying number of bounding boxes. - - A custom function is needed for detection - because the number of bounding boxes varies - between images of the same batch. - See https://pytorch.org/vision/main/auto_examples/transforms/plot_transforms_e2e.html#data-loading-and-training-loop - - Parameters - ---------- - batch : tuple - a tuple of 2 tuples, the first one holding all images in the batch, - and the second one holding the corresponding annotations. - - Returns - ------- - tuple - a tuple of length = batch size, made up of (image, annotations) - tuples. - - """ - return tuple(zip(*batch, strict=True)) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Input data - -dataset_dir = Path("/home/sminano/swc/project_crabs/data/aug2023-full") -images_dir = dataset_dir / "frames" -annotations_dir = dataset_dir / "annotations" -annotations_file_path = annotations_dir / "VIA_JSON_combined_coco_gen.json" - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Define a dataloader -# Define transforms for inference -inference_transforms = transforms.Compose( - [ - transforms.ToImage(), - transforms.ToDtype(torch.float32, scale=True), - ] -) - -# Create COCO dataset -# TODO: convert from ethology detections dataset to COCO dataset -# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") -dataset_coco = create_coco_dataset( - images_dir=Path(dataset_dir) / "frames", - annotations_file=annotations_file_path, - composed_transform=inference_transforms, -) - -# dataloader -dataloader = DataLoader( - dataset_coco, - batch_size=12, # 12, - shuffle=False, - num_workers=8, # 4 - collate_fn=collate_fn_varying_n_bboxes, - persistent_workers=True, - # pin_memory=True, # <-- Faster CPU->GPU transfer - # because we guarantee a physical address for the data - # in memory, so we can use DMA that directly takes it to - # the GPU - # prefetch_factor=4, # <-- Prefetch more batches - # multiprocessing_context="fork" - # if ref_config["num_workers"] > 0 and torch.backends.mps.is_available() - # else None, # see https://github.com/pytorch/pytorch/issues/87688 -) - -# %% -# TODO: dataloader to ethology detections dataset -gt_bboxes_ds = load_bboxes.from_files( - annotations_file_path, format="COCO", images_dirs=images_dir -) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Define a YAML config file for the ensemble of trained detectors -experiment_ID = "617393114420881798" -ml_runs_experiment_dir = ( - Path("/home/sminano/swc/project_crabs/ml-runs") / experiment_ID -) -last_ckpt = Path("checkpoints") / "last.ckpt" - -config = { - "models": { - "model_class": "fasterrcnn_resnet50_fpn_v2", - # imported from torchvision.models.detection - "model_kwargs": { - "num_classes": 2, - "weights": None, # null in YAML becomes None in Python - "weights_backbone": None, - }, - "checkpoints": [ - str( - ml_runs_experiment_dir - / "f348d9d196934073bece1b877cbc4d38" - / last_ckpt - ), # above_0th - str( - ml_runs_experiment_dir - / "879d2f77e2b24adcb06b87d2fede6a04" - / last_ckpt - ), # above_1st - str( - ml_runs_experiment_dir - / "75583ec227e3444ab692b99c64795325" - / last_ckpt - ), # above_5th - str( - ml_runs_experiment_dir - / "4acc37206b1e4f679d535c837bee2c2f" - / last_ckpt - ), # above_10th - str( - ml_runs_experiment_dir - / "fdcf88fcbcc84fbeb94b45ca6b6f8914" - / last_ckpt - ), # above_25th - str( - ml_runs_experiment_dir - / "daa05ded0ea047388c9134bf044061c5" - / last_ckpt - ), # above_50th - ], - }, - "fusion": { - "method": "weighted_boxes_fusion", - # "nms", "soft_nms", "weighted_boxes_fusion" or "non_maximum_weighted" - "method_kwargs": { - # arguments as in ensemble_boxes.weighted_boxes_fusion - "iou_thr": 0.5, # iou threshold for the ensemble - "skip_box_thr": 0.0001, - }, - "n_jobs": -1, # workers for joblib.Parallel, - # n_workers should be <= number of CPU cores - # follows joblib n_jobs - # if -1: all are used - # if None: same as 1 - # "confidence_threshold_post_fusion": 0.0, - "max_n_detections": 300, - }, -} -config_file = "ensemble_of_detectors.yaml" -with open(config_file, "w") as f: - yaml.dump(config, f, sort_keys=False) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Load the ensemble of detectors -ensemble_detector = EnsembleDetector(config_file) -print(f"Ensemble detector is on device: {ensemble_detector.device}") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Run the ensemble of detectors on a dataset -# Use Trainer for inference (this sets the device flexibly) - -# With multiple devices: -# Lightning handles the "main" device (so still device=1), -# while code internally distributes models across GPUs using parallel_apply. -trainer = Trainer( - accelerator="gpu", - devices=1, - logger=False, - precision="16-mixed", # --- results change - # strategy = 'ddp' ? -) -predictions = trainer.predict(ensemble_detector, dataloader) - - -# %% -# Format predictions as ethology detections dataset and add attrs -# TODO: think about syntax of format_predictions (should it be instance or -# static method instead?) -ensemble_detections_ds = ensemble_detector.format_predictions( - predictions=predictions, - attrs=gt_bboxes_ds.attrs -) - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Fuse detections across models using selected method -image_width_height = np.array(dataloader.dataset[0][0].shape[-2:])[::-1] -ensemble_detections_ds.attrs["image_shape"] = image_width_height -config_fusion: dict = config["fusion"] - - -fused_detections_ds = fuse_detections( - ensemble_detections_ds, - fusion_method=config_fusion["method"], - fusion_method_kwargs=config_fusion["method_kwargs"], - # n_workers=config_fusion.get("n_jobs", 1), - # max_n_detections=config_fusion["max_n_detections"], - # should be larger than expected maximum number of detections after fusion - # ---- method kwargs ---- -) - -# %% - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Fuse detections across models with NMS - -# fused_detections_nms_ds = fuse_ensemble_detections( -# ensemble_detections_ds, -# fusion_method="soft_nms", -# fusion_method_kwargs={ -# "iou_thr": config_fusion["method_kwargs"]["iou_thr"], -# "sigma": 0.5, -# "thresh": 0.001, -# }, -# max_n_detections=500, -# ) - -# fused_detections_ds = fused_detections_nms_ds -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Remove low confidence detections -confidence_threshold_post_fusion = 0.4 -fused_detections_ds_ = fused_detections_ds.where( - fused_detections_ds.confidence >= confidence_threshold_post_fusion -) - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Evaluate the ensemble model -# - load ground truth -# - compute metrics - -# gt_bboxes_ds = load_bboxes.from_files(annotations_file_path, format="COCO") - -iou_threshold_tp = 0.25 -fused_detections_ds_, gt_bboxes_ds = compute_precision_recall_ds( - pred_bboxes_ds=fused_detections_ds_, - gt_bboxes_ds=gt_bboxes_ds, - iou_threshold=iou_threshold_tp, -) - -# All models on full August dataset, without removing low -# confidence detections: -# confidence_threshold_post_fusion = 0.0 -# Precision: 0.5920 -# Recall: 0.8455 -# --- -# confidence_threshold_post_fusion = 0.4 -# Precision: 0.8339 -# Recall: 0.7177 -# --- -# confidence_threshold_post_fusion = 0.5 -# Precision: 0.8714 -# Recall: 0.6624 -# --- -# confidence threshold post fusion: 0.40 AND mixed precision in trainer -# Precision: 0.8336 -# Recall: 0.7162 - -print( - "Ensemble model with confidence threshold post fusion: " - f"{confidence_threshold_post_fusion:.2f}" -) -print(f"Precision: {fused_detections_ds_.precision.mean().values:.4f}") -print(f"Recall: {fused_detections_ds_.recall.mean().values:.4f}") - - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% -# Evaluate single models -list_detections_ds_eval = [] -for k in range(ensemble_detections_ds.sizes["model"]): - # filter low confidence detections (for a fairer comparison) - detections_one_model = ensemble_detections_ds.where( - ensemble_detections_ds.confidence >= confidence_threshold_post_fusion - ).sel(model=k) - - # evaluate - detections_ds, _ = compute_precision_recall_ds( - pred_bboxes_ds=detections_one_model, - gt_bboxes_ds=gt_bboxes_ds, - iou_threshold=iou_threshold_tp, - ) - list_detections_ds_eval.append(detections_ds) - - print(f"Model: {k}") - print(f"Precision: {detections_ds.precision.mean().values:.4f}") - print(f"Recall: {detections_ds.recall.mean().values:.4f}") - print("--------------------------------") - -# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% From b34680d09abc2d56f8a421e41562ec0589136f1d Mon Sep 17 00:00:00 2001 From: sfmig <33267254+sfmig@users.noreply.github.com> Date: Thu, 4 Dec 2025 18:43:32 +0000 Subject: [PATCH 39/39] Docs fixes --- docs/requirements.txt | 1 + docs/source/conf.py | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index cb55a343..b270cde9 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -9,4 +9,5 @@ sphinx-autodoc-typehints sphinx-design sphinx-gallery sphinx-notfound-page +sphinx-paramlinks sphinx-sitemap diff --git a/docs/source/conf.py b/docs/source/conf.py index 6db7d86d..d906807e 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -37,6 +37,7 @@ "sphinx.ext.autosummary", "sphinx.ext.viewcode", "sphinx.ext.intersphinx", + "sphinx.ext.doctest", # for lightning docstrings "myst_parser", "nbsphinx", "notfound.extension", @@ -44,6 +45,7 @@ "sphinx_gallery.gen_gallery", "sphinx_sitemap", "sphinx.ext.autosectionlabel", + "sphinx_paramlinks", ] # Configure the myst parser to enable cool markdown features @@ -186,6 +188,8 @@ "https://python-jsonschema.readthedocs.io/en/stable/", None, ), + "torch": ("https://pytorch.org/docs/stable/", None), + "pytorch_lightning": ("https://lightning.ai/docs/pytorch/stable/", None), }