Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,6 @@ uv.lock

# written by setuptools_scm
**/_version.py

# uv
uv.lock
38 changes: 20 additions & 18 deletions ethology/io/annotations/save_bboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,7 @@ def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
"""Get preliminary dataframe from a dataset of bounding boxes annotations.

If the dataset has an "image_shape" array, the returned dataframe
will have "image_shape_x" and "image_shape_y" columns. The returned
dataframe will have a "category" column, filled with the relevant category
values, or filled with -1 if no category array was present in the
original dataset.
will have "image_shape_x" and "image_shape_y" columns.

The returned dataframe is not COCO-exportable.

Expand All @@ -128,15 +125,15 @@ def _get_raw_df_from_ds(ds: xr.Dataset) -> pd.DataFrame:
# (where at least one of the specified columns contains a NaN value.)
df_raw = df_raw.dropna(subset=["position", "shape"])

# Add "category" column if not present
if "category" not in df_raw.columns:
df_raw["category"] = -1

# Pivot the dataframe to get position_x, position_y, shape_x, shape_y, etc.
index_cols = ["image_id", "id", "category"]
pivot_values = ["position", "shape"]
if "image_shape" in df_raw.columns:
pivot_values.append("image_shape")
# pivot_values: variables with x and y values
# index_cols: variables **without** x and y values
pivot_values = [
c for c in ["position", "shape", "image_shape"] if c in df_raw.columns
]
index_cols = [
c for c in df_raw.columns if c not in {*pivot_values, "space"}
]

df_raw = df_raw.pivot_table(
index=index_cols,
Expand Down Expand Up @@ -238,17 +235,22 @@ def _add_COCO_data_to_df(
]
)

# Rename "category" to "category_id" (in dataset it is an integer)
# and compute "category" as string from "category_id"
map_category_to_str = ds_attrs["map_category_to_str"]
# Rename "category" to "category_id"
# (in input dataset "category" is an integer, but in COCO it is a str)
df.rename(columns={"category": "category_id"}, inplace=True)
df["category"] = df["category_id"].map(map_category_to_str)
# and compute "category" as a string from "category_id"
map_category_to_str = ds_attrs["map_category_to_str"]
df["category"] = df["category_id"].map(
lambda x: map_category_to_str.get(x, "")
) # set value to "" if category ID is not defined in map_category_to_str

# supercategory
# Set supercategory to empty string if not defined
if "supercategory" not in df.columns:
df["supercategory"] = ""
else:
df["supercategory"] = df["supercategory"].astype(str)

# other
# Set iscrowd always to 0
df["iscrowd"] = 0

# Set index name and add "annotation_id" as column
Expand Down
7 changes: 5 additions & 2 deletions ethology/validators/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,9 +219,10 @@ class ValidBboxAnnotationsDataset(ValidDataset):

- is an xarray Dataset,
- has ``image_id``, ``space``, ``id`` as dimensions,
- has ``position`` and ``shape`` as data variables,
- both data variables span at least the dimensions ``image_id``,
- has ``position``, ``shape`` and ``category`` as data variables,
- ``position`` and ``shape`` span at least the dimensions ``image_id``,
``space`` and ``id``.
- ``category`` spans at least the dimensions ``image_id`` and ``id``.


Attributes
Expand All @@ -237,6 +238,7 @@ class ValidBboxAnnotationsDataset(ValidDataset):

- ``position`` maps to ``image_id``, ``space`` and ``id``,
- ``shape`` maps to ``image_id``, ``space`` and ``id``.
- ``category`` maps to ``image_id`` and ``id``.

Raises
------
Expand All @@ -259,6 +261,7 @@ class ValidBboxAnnotationsDataset(ValidDataset):
required_data_vars: ClassVar[dict[str, set]] = {
"position": {"image_id", "space", "id"},
"shape": {"image_id", "space", "id"},
"category": {"image_id", "id"},
}


Expand Down
8 changes: 6 additions & 2 deletions ethology/validators/detections.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,12 @@ class ValidBboxDetectionsDataset(ValidDataset):

- is an xarray Dataset,
- has ``image_id``, ``space``, ``id`` as dimensions,
- has ``position``, ``shape`` and ``confidence`` as data variables,
- has ``position``, ``shape``, ``category`` and ``confidence`` as data
variables,
- ``position`` and ``shape`` span at least the dimensions ``image_id``,
``space`` and ``id``,
- ``confidence`` spans at least the dimensions ``image_id`` and ``id``.
- ``category`` and ``confidence`` span at least the dimensions
``image_id`` and ``id``.


Attributes
Expand All @@ -34,6 +36,7 @@ class ValidBboxDetectionsDataset(ValidDataset):

- ``position`` maps to ``image_id``, ``space`` and ``id``,
- ``shape`` maps to ``image_id``, ``space`` and ``id``,
- ``category`` maps to ``image_id`` and ``id``,
- ``confidence`` maps to ``image_id`` and ``id``.

Raises
Expand All @@ -57,5 +60,6 @@ class ValidBboxDetectionsDataset(ValidDataset):
required_data_vars: ClassVar[dict[str, set]] = {
"position": {"image_id", "space", "id"},
"shape": {"image_id", "space", "id"},
"category": {"image_id", "id"},
"confidence": {"image_id", "id"},
}
2 changes: 2 additions & 0 deletions tests/fixtures/annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,12 +130,14 @@ def valid_bbox_annotations_dataset():
(len(image_ids), len(space_dims), len(annotation_ids))
)
shape_data = np.copy(position_data)
category_data = np.ones((len(image_ids), len(annotation_ids)))

# Create the dataset
ds = xr.Dataset(
data_vars={
"position": (["image_id", "space", "id"], position_data),
"shape": (["image_id", "space", "id"], shape_data),
"category": (["image_id", "id"], category_data),
},
coords={
"image_id": image_ids,
Expand Down
70 changes: 58 additions & 12 deletions tests/test_unit/test_io_annotations/test_save_bboxes.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
import pandas as pd
import pandera.pandas as pa
import pytest
import xarray as xr

from ethology.io.annotations.load_bboxes import from_files
from ethology.io.annotations.save_bboxes import (
Expand Down Expand Up @@ -238,30 +239,25 @@ def test_validate_bboxes_df_COCO(
def test_get_raw_df_from_ds(
annotations_test_data: dict, input_file: str, drop_variables: bool
):
"""Test the function that gets the raw dataframe derived from the xarray
dataset fills in the appropriate category values, and includes the image
shape columns if present in the original dataset.
"""Test that the function that computes the raw dataframe from the xarray
dataset includes the image shape columns, if they are present in the
original dataset.
"""
# Read input dataset
input_file = annotations_test_data[input_file]
format: Literal["VIA", "COCO"] = (
"VIA" if "VIA" in str(input_file) else "COCO"
)
ds = from_files(input_file, format=format)

# Drop data arrays if specified
# Drop "image_shape" data array if required
if drop_variables:
vars_to_drop = [
var
for var in ["category", "image_shape"]
if var in list(ds.data_vars.keys())
]
ds = ds.drop_vars(vars_to_drop) # type: ignore
ds = ds.drop_vars("image_shape") # type: ignore

# Get raw dataframe
df_raw = _get_raw_df_from_ds(ds)

# The "category" column should always be present in the raw dataframe,
# even if the category array was not present in the original dataset
# List of expected columns
list_expected_columns = [
"image_id",
"id",
Expand Down Expand Up @@ -350,6 +346,56 @@ def test_add_COCO_data_to_df(annotations_test_data: dict):
assert all(df_output["iscrowd"] == 0)


def test_add_COCO_data_to_df_empty_category(annotations_test_data):
"""Test that if the category ID is not included in map_category_to_str
the category name is mapped to an empty string.
"""
# Read input file as bboxes dataset
input_file = annotations_test_data["small_bboxes_COCO.json"]
ds = from_files(input_file, format="COCO")

# Change map from category IDs to strings to a
# category ID that is not present in the dataset
assert 999 not in ds.map_category_to_str
ds.attrs["map_category_to_str"] = {999: "foo"}

# Get raw dataframe
df_raw = _get_raw_df_from_ds(ds)

# Fill in missing columns with defaults
df_output = _add_COCO_data_to_df(df_raw, ds.attrs)

# Check category name is an empty string
assert all(df_output["category"] == "")


@pytest.mark.parametrize("supercategory_value", [999, "foo", True])
def test_add_COCO_data_to_df_empty_supercategory(
annotations_test_data, supercategory_value
):
"""Test that if defined, the supercategory data variable is cast to str."""
# Read input file as bbox annotations dataset
input_file = annotations_test_data["small_bboxes_COCO.json"]
ds = from_files(input_file, format="COCO")

# Fill dataset with supercategory as data variable
ds["supercategory"] = xr.full_like(
ds.category,
fill_value=supercategory_value,
dtype=object,
)

# Get raw dataframe
df_raw = _get_raw_df_from_ds(ds)

# Fill in missing columns with defaults
df_output = _add_COCO_data_to_df(df_raw, ds.attrs)

# Check supercategory name has expected value and is cast as string
assert df_output["supercategory"].apply(lambda x: isinstance(x, str)).all()
assert all(df_output["supercategory"] == str(supercategory_value))


def test_create_COCO_dict(sample_bboxes_df: Callable):
"""Test the function that transforms the modified bboxes dataframe to
a COCO dictionary.
Expand Down
23 changes: 22 additions & 1 deletion tests/test_unit/test_validators/test_annotations.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,10 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
["image_id", "space", "id", "foo"],
np.zeros((3, 2, 2, 1)),
),
"category": (
["image_id", "id", "foo"],
np.ones((3, 2, 1)),
),
},
),
does_not_raise(),
Expand All @@ -260,6 +264,10 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
["image_id", "space", "id"],
np.zeros((3, 2, 2)),
),
"category": (
["image_id", "id"],
np.ones((3, 2)),
),
},
),
pytest.raises(ValueError),
Expand All @@ -280,14 +288,18 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
},
),
pytest.raises(ValueError),
"Missing required data variables: ['position', 'shape']",
(
"Missing required data variables: "
"['category', 'position', 'shape']"
),
),
(
xr.Dataset(
coords={"image_id": np.arange(3), "id": np.arange(2)},
data_vars={
"position": (["image_id", "id"], np.zeros((3, 2))),
"shape": (["image_id", "id"], np.zeros((3, 2))),
"category": (["image_id", "id"], np.ones((3, 2))),
},
),
pytest.raises(ValueError),
Expand All @@ -309,6 +321,10 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
["foo", "bar", "id"],
np.zeros((3, 2, 2)),
),
"category": (
["foo", "id"],
np.ones((3, 2)),
),
},
),
pytest.raises(ValueError),
Expand All @@ -330,6 +346,10 @@ def test_COCO_non_unique_image_IDs(annotations_test_data: dict):
["image_id", "id"],
np.zeros((3, 2)),
),
"category": (
["image_id", "id"],
np.ones((3, 2)),
),
},
),
pytest.raises(ValueError),
Expand Down Expand Up @@ -377,4 +397,5 @@ def test_validator_bbox_annotations_dataset(
assert validator.required_data_vars == {
"position": {"id", "image_id", "space"},
"shape": {"id", "image_id", "space"},
"category": {"id", "image_id"},
}
Loading