From bf9b484056c59f9adc2ff219b08fdd93aa54494c Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 25 Apr 2025 10:12:49 +0100
Subject: [PATCH 01/12] Exploring cotracker online model options

---
 MANIFEST.in                  |   3 +
 notebook_cotracker_online.py | 307 +++++++++++++++++++++++++++++++++++
 pyproject.toml               |   4 +
 3 files changed, 314 insertions(+)
 create mode 100644 notebook_cotracker_online.py

diff --git a/MANIFEST.in b/MANIFEST.in
index d5fb477d..d45a5b91 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -13,3 +13,6 @@ recursive-include docs *.md *.rst *.py
 # Include json schemas
 recursive-include ethology/annotations/json_schemas/schemas *.json
 recursive-include ethology/annotations/json_schemas/schemas *.md
+
+# Temporarily include notebooks
+include notebook_cotracker_online.py
diff --git a/notebook_cotracker_online.py b/notebook_cotracker_online.py
new file mode 100644
index 00000000..f9378f84
--- /dev/null
+++ b/notebook_cotracker_online.py
@@ -0,0 +1,307 @@
+"""Multi-window approach for online tracking with CoTracker3.
+
+Todo:
+- more query points?
+- longer window better?
+- overlapping window better?
+
+"""
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+import os
+from datetime import datetime
+from pathlib import Path
+
+import imageio.v3 as iio
+import numpy as np
+import torch
+from movement.io import load_bboxes, load_poses, save_poses
+
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+DEFAULT_DEVICE = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "mps"
+    if torch.backends.mps.is_available()
+    else "cpu"
+)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Input video
+video_path = Path(
+    "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test.mp4"
+)
+
+window_half_length = 40  # in frames, overlapping,
+step_between_query_frames = 100
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Query points
+
+ground_truth_data = Path(
+    "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test_corrected_ST_SM_20241029_113207.csv"
+)
+
+ds_gt = load_bboxes.from_file(
+    file_path=ground_truth_data,
+    source_software="VIA-tracks",
+    use_frame_numbers_from_file=False,
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Select query points
+# ------------------
+# Select one individual only
+ds_gt_one = ds_gt.isel(individuals=[13 - 1])  # [3-1,13-1,35-1,57-1])
+
+# Select all individuals
+# ds_gt_one = ds_gt
+
+print(ds_gt_one)
+
+# Select frames
+list_frames = list(range(ds_gt_one.sizes["time"]))
+frames_to_select = np.array(list_frames)[
+    ::step_between_query_frames
+]  # every N frame
+print(frames_to_select)
+# --------------------
+
+# Prepare query points array
+# it has frame as first column
+queries_array = np.vstack(
+    [
+        np.hstack(
+            [
+                f
+                * np.ones((ds_gt_one.sizes["individuals"], 1)),  # frame column
+                ds_gt_one.position.sel(time=f).values.T,  # x, y columns
+            ]
+        )
+        for f in range(ds_gt_one.sizes["time"])
+    ]
+)
+
+# Remove rows with nans in position
+queries_array = queries_array[~np.any(np.isnan(queries_array), axis=1), :]
+
+# Selected queries
+queries_sel = queries_array[
+    [col in frames_to_select for col in queries_array[:, 0]], :
+]
+print(np.unique(queries_sel[:, 0]))
+
+# %%
+# Convert to torch tensor
+queries = torch.tensor(queries_sel)
+queries = queries.to(torch.float).to(DEFAULT_DEVICE)
+
+
+# %%%%%%%%%%%%%%%%%%%
+# Load online model
+
+model = torch.hub.load("facebookresearch/co-tracker", "cotracker3_online")
+
+# Set window length
+model.model.window_len = window_half_length * 2  # in frames
+
+# Set step
+model.step = window_half_length
+print(model.step)  # window is of width model.step * 2
+
+# Move to GPU
+model = model.to(DEFAULT_DEVICE)
+
+# model = CoTrackerOnlinePredictor(
+#     # checkpoint=None,
+#     checkpoint=(
+#       "/home/sminano/swc/project_ethology/tap_models_crabs/"
+#       "scaled_online.pth"
+#     ),
+#     window_len=2 * window_half_length,  # in frames
+#     v2=False,
+# )
+
+
+# %%%%%%%%%%%%%%%
+# Process chunk function
+
+
+def _process_step(window_frames, is_first_step, queries):
+    # Get a chunk of the video
+    video_chunk = (
+        torch.tensor(
+            np.stack(window_frames[-model.step * 2 :]), device=DEFAULT_DEVICE
+        )
+        .float()
+        .permute(0, 3, 1, 2)[None]
+    )  # (1, T, 3, H, W)
+
+    # Process the video chunk with the model
+    return model(
+        video_chunk,
+        is_first_step=is_first_step,
+        queries=queries[None],
+    )
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Process video in non-overlapping chunks
+window_frames: list[np.ndarray] = []
+
+# Iterating over video frames, processing one window at a time:
+is_first_step = True
+video_iterator = iio.imiter(str(video_path), plugin="FFMPEG")
+for i, frame in enumerate(video_iterator):
+    # Process a video chunk (non-overlapping right?)
+    if i % model.step == 0 and i != 0:
+        pred_tracks, pred_visibility = _process_step(
+            window_frames,
+            is_first_step,
+            queries=queries,
+        )
+        is_first_step = False
+
+    # append frame to window_frames
+    window_frames.append(frame)
+
+
+# Processing the final video frames
+# (in case video length is not a multiple of model.step)
+pred_tracks, pred_visibility = _process_step(
+    window_frames[-(i % model.step) - model.step - 1 :],
+    is_first_step,
+    queries=queries,
+    # grid_query_frame=i
+)
+
+print("Tracks are computed")
+
+# %%
+print(pred_tracks)
+print(pred_tracks.shape)  # (1, T, N, 2)
+print(
+    "pred_tracks in MB: "
+    f"{(pred_tracks.element_size() * pred_tracks.nelement()) / 1e6}"
+)
+
+print(pred_visibility.shape)  # (1, T, N, 1)
+print(
+    "pred_visibility in MB: "
+    f"{(pred_visibility.element_size() * pred_visibility.nelement()) / 1e6}"
+)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Save as a movement dataset
+# (n_frames, n_space, n_keypoints, n_individuals)
+
+# assuming 1 query per individual in frame 0
+position_array = (
+    pred_tracks.permute(1, -1, 0, -2).cpu().numpy()
+)  # (T, 2, 1, Nqueries)
+visibility_array = pred_visibility.cpu().numpy()[0]  # (T, Nqueries)
+
+# set to nan if non visible
+# (improve this)
+for i in range(visibility_array.shape[1]):
+    position_array[~visibility_array[:, i], :, :, i] = np.nan
+
+# -----------------------------
+# # get each track from its query point
+# position_array_fix = np.vstack(
+#     [
+#         position_array[
+#             frames_to_select[i]:(frames_to_select[i+1]
+#               if i<queries.shape[0]-1 else None), :, i
+#         ]
+#         for i in range(queries.shape[0])
+#     ]
+# )
+# position_array_fix = position_array_fix.T[None,None].T
+# --------------------------------------------
+
+ds = load_poses.from_numpy(
+    position_array=position_array,  # position_array_fix,
+    individual_names=[f"ind_{i}" for i in range(position_array.shape[-1])],
+    keypoint_names=["centroid"],
+    source_software="CoTracker3",
+)
+
+
+# Export to read in napari
+ds.attrs["source_file"] = ""
+
+# get string timestamp of  today in yyyymmdd_hhmmss
+timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+save_poses.to_sleap_analysis_file(
+    ds,
+    f"../tap_models_crabs/output/cotracker_output_{timestamp}.h5",
+)
+
+
+# %%
+print(f"Allocated: {torch.cuda.memory_allocated() / 1024**2:.2f} MB")
+print(f"Reserved:  {torch.cuda.memory_reserved() / 1024**2:.2f} MB")
+
+
+# %%
+def model_gpu_mem_MB(model):
+    """Calculate the GPU memory usage of a PyTorch model."""
+    total = 0
+    for param in model.parameters():
+        if param.is_cuda:
+            total += param.element_size() * param.nelement()
+    for buffer in model.buffers():
+        if buffer.is_cuda:
+            total += buffer.element_size() * buffer.nelement()
+    return total / 1024 / 1024  # Convert bytes to MB
+
+
+print(f"Model uses approximately {model_gpu_mem_MB(model):.2f} MB on GPU")
+
+# %%
+# Remove model from GPU?
+# del model
+
+# %%
+
+# Save a video with predicted tracks
+
+# vis = Visualizer(save_dir="output", pad_value=120, linewidth=3)
+# vis.visualize(video, pred_tracks, pred_visibility,
+# query_frame=grid_query_frame)
+
+
+# # Generate timestamp of today in format YYYYMMDD_HHMMSS
+# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+# # Place video on gpu
+# video = read_video_from_path(video_path)
+# video = torch.from_numpy(video).permute(0, 3, 1, 2)[None]
+# print((video.element_size() * video.nelement()) / 1e9)  # in GB
+
+# video = video.to(DEFAULT_DEVICE)  # (1, T, 3, H, W) ---> OOM
+
+
+# # %%
+# vis = Visualizer(
+#     save_dir="./output",
+#     linewidth=1,
+#     mode="cool",
+#     tracks_leave_trace=-1,
+#     fps=10,
+# )
+
+# vis.visualize(
+#     video,
+#     pred_tracks,  # .to('cpu'),
+#     pred_visibility,
+#     query_frame=grid_query_frame,
+#     filename=f"queries_{timestamp}",
+# )
+
+# %%
diff --git a/pyproject.toml b/pyproject.toml
index 3fb07d7b..2648d341 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,6 +20,10 @@ classifiers = [
 ]
 dependencies = [
   "movement",
+  "torch",
+  "torchvision",
+  "cotracker @ git+https://github.com/facebookresearch/co-tracker.git",
+  "imageio[ffmpeg]",
 ]
 
 [project.urls]

From 8be868ea5c79730ce73cdad80f5196b83d8834b3 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 25 Apr 2025 10:40:55 +0100
Subject: [PATCH 02/12] offline WIP

---
 notebook_cotracker_offline.py | 263 ++++++++++++++++++++++++++++++++++
 1 file changed, 263 insertions(+)
 create mode 100644 notebook_cotracker_offline.py

diff --git a/notebook_cotracker_offline.py b/notebook_cotracker_offline.py
new file mode 100644
index 00000000..7f5d9c82
--- /dev/null
+++ b/notebook_cotracker_offline.py
@@ -0,0 +1,263 @@
+# %%
+# Imports
+# import sleap_io as sio
+import os
+from datetime import datetime
+from pathlib import Path
+
+import matplotlib.pyplot as plt
+import numpy as np
+import torch
+import torch.nn.functional as F
+from cotracker.utils.visualizer import read_video_from_path
+from movement.io import load_bboxes, load_poses, save_poses
+
+os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"
+
+
+DEFAULT_DEVICE = (
+    "cuda"
+    if torch.cuda.is_available()
+    else "mps"
+    if torch.backends.mps.is_available()
+    else "cpu"
+)
+
+# %matplotlib widget
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Data paths
+video_path = "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test.mp4"
+
+ground_truth_data = Path(
+    "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test_corrected_ST_SM_20241029_113207.csv"
+)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Query points from gt
+
+ds_gt = load_bboxes.from_file(
+    file_path=ground_truth_data,
+    source_software="VIA-tracks",
+    use_frame_numbers_from_file=False,
+)
+
+# Prepare query points array
+# it has frame as first column
+queries_array = np.vstack(
+    [
+        np.hstack(
+            [
+                f * np.ones((ds_gt.sizes["individuals"], 1)),  # frame column
+                ds_gt.position.sel(time=f).values.T,  # x, y columns
+            ]
+        )
+        for f in range(ds_gt.sizes["time"])
+    ]
+)
+
+# Remove rows with nans in position
+queries_array = queries_array[~np.any(np.isnan(queries_array), axis=1), :]
+
+# # Select frames
+# list_frames = list(range(ds_gt.sizes["time"]))
+# frames_to_select = np.array(list_frames)[
+#     ::step_between_query_frames
+# ]  # every second frame
+# queries_sel = queries_array[[col in frames_to_select for col in queries_array[:, 0]], :]
+
+# print(np.unique(queries_sel[:, 0]))
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Read video
+# TODO: is it faster with sleap_io?
+video_full = read_video_from_path(video_path)
+print(type(video_full))
+print(video_full.shape)  # (614, 2160, 4096, 3)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Make it a torch tensor with dimensions in expected order
+video_full = torch.from_numpy(video_full).permute(0, 3, 1, 2)[None]
+
+print(video_full.shape)  # (1, 614, 3, 2160, 4096)
+print(video_full.device)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Downsample frames
+# out_frame_size = [216, 410] # 108, 205
+# video = F.interpolate(video[0], out_frame_size, mode="bilinear")[None]
+
+print(video_full.shape)  # (1, 614, 3, 2160, 4096)
+print(video_full.device)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Select first part of the video only  (to fit in GPU)
+# video = video[:, : video.shape[1] // 8]
+chunk_start = 0
+video = video_full[:, chunk_start : chunk_start + 75, :, :, :]  # 75 frames
+print(video.shape)  # (1, 307, 3, 2160, 4096)
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Convert to float and place video on device
+# Why do we need .float conversion?
+# chatgpt: Mathematical operations like convolutions, normalizations, or matrix mults expect float32 or float16
+
+
+device = "cuda"
+# video = video.float().to(device)
+# video = video.half().to(device) # Use half precision for memory efficiency
+# TODO: Make sure your video is normalized properly (video / 255.0) before converting to half()
+video = video.to(torch.float).to(device)  # torch.float16
+
+# %%
+# Check gpu memory usage
+# print(torch.cuda.memory_summary())
+# %%
+# Define query points
+queries = torch.tensor(
+    [
+        [0.0, 1070.1, 1697.1],
+        # if downsampled: [0.0, 97.09, 177.34],  # point tracked from the first frame
+        [0.0, 980.7, 1762.2],
+        # if downsampled: [0.0, 106.20, 170.33],
+        # [113.0, 1961.00, 1665.00]
+        # [10.0, 600.0, 500.0],  # frame number 10
+        # [20.0, 750.0, 600.0],  # ...
+        # [30.0, 900.0, 200.0],
+    ]
+)
+
+# # Select all points at the first frame of the chunk
+# queries = queries_array[queries_array[:, 0] == chunk_start, :]  
+# queries = queries[:1, :]
+# queries = torch.tensor(queries)
+
+# Place query tensor on GPU
+queries = queries.to(torch.float).to(device)  # .half().to(device) torch.float16
+
+# %%
+# Visualize query points over frame
+
+# Create a list of frame numbers corresponding to each point
+frame_numbers = queries[:, 0].int().unique().tolist()
+
+for frame_number in frame_numbers:
+    # get the query points for the current frame
+    queries_one_frame = queries[queries[:, 0] == frame_number]
+
+    fig, ax = plt.subplots(1, 1)
+    # plot frame
+    ax.imshow(
+        video_full[frame_number, :, :]
+    )  # B T C H W -> H W C
+    # plot query points
+    ax.scatter(
+        x=queries_one_frame[:, 1].cpu(), y=queries_one_frame[:, 2].cpu(), s=5, c="red"
+    )
+
+    ax.set_title("Frame {}".format(frame_number))
+    ax.set_xlim(0, video.shape[4])
+    ax.set_ylim(0, video.shape[3])
+    ax.invert_yaxis()
+
+
+# %%
+# Get Offline CoTracker model
+model = torch.hub.load("facebookresearch/co-tracker", "cotracker3_offline")
+
+# Use the model in half precision and move it to the GPU
+# Note: this is for memory usage
+model = model.to(device)  # .half().to(device) # .to(torch.float16).to(device)
+
+
+# %%
+# all_half = all(p.dtype == torch.float16 for p in model.parameters())
+# print("All parameters are float16:", all_half)
+
+# for name, param in model.named_parameters():
+#     # print(f"{name}: {param.dtype}")
+#     if param.dtype == torch.float32:
+#         param.data = param.data.to(torch.float16)
+#         print("PATATA")
+
+# for name, buffer in model.named_buffers():
+#     print(f"{name}: {buffer.dtype}")
+
+
+# %%
+# Run CoTracker
+pred_tracks, pred_visibility = model(
+    video, queries=queries[None], backward_tracking=True
+)  # B T N 2,  B T N 1
+
+
+# from torch.cuda.amp import autocast
+# model.eval()
+# with torch.no_grad(), torch.autocast(device_type="cuda"):
+#     pred_tracks, pred_visibility = model(
+#         video, queries=queries[None], #backward_tracking=True
+#     )  # B T N 2,  B T N 1
+
+# %%
+# TODO: Can I upsample the results to the original video res?
+print(pred_tracks.shape)  # (1, 307, 2, 2) --> Batch, Time, N of points, 2 (x,y)
+print(pred_visibility.shape)
+
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Save as a movement dataset
+
+# Assuming 1 query per individual
+
+# Prepare position array
+# (n_frames, n_space, n_keypoints, n_individuals)
+position_array = np.empty(
+    ds_gt.position.shape[:2] + (1,) + (ds_gt.position.shape[-1],)
+    # add keypoint dimension
+)
+position_array.fill(np.nan)
+position_array[150 : 150 + 75, :, :, :] = (
+    pred_tracks.permute(1, -1, 0, -2).cpu().numpy()
+)  
+
+ds = load_poses.from_numpy(
+    position_array=position_array,
+    individual_names=[f"ind_{i}" for i in range(pred_tracks.shape[2])],
+    keypoint_names=["centroid"],
+    source_software="CoTracker3",
+)
+
+
+# Export to read in napari
+ds.attrs["source_file"] = ""
+
+# get string timestamp of  today in yyyymmdd_hhmmss
+timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+save_poses.to_sleap_analysis_file(
+    ds,
+    f"output/cotracker_offline_output_{timestamp}.h5",
+)
+
+
+# %%
+# # Visualize results
+
+# vis = Visualizer(
+#     save_dir="./output",
+#     linewidth=1,
+#     mode="cool",
+#     tracks_leave_trace=-1,
+#     fps=10,
+# )
+
+# # Generate timestamp of today in format YYYYMMDD_HHMMSS
+# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+
+# # Save video with predictions
+# vis.visualize(video, pred_tracks, pred_visibility, filename=f"queries_{timestamp}")
+
+# %%

From a201812e58bbc3d7bf78c5e0c42e58ce3840456d Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 25 Apr 2025 11:56:10 +0100
Subject: [PATCH 03/12] Trying offline querying one individual multiple frames

---
 notebook_cotracker_offline.py | 237 +++++++++++++++++++++-------------
 1 file changed, 145 insertions(+), 92 deletions(-)

diff --git a/notebook_cotracker_offline.py b/notebook_cotracker_offline.py
index 7f5d9c82..c18e43f9 100644
--- a/notebook_cotracker_offline.py
+++ b/notebook_cotracker_offline.py
@@ -1,6 +1,5 @@
 # %%
 # Imports
-# import sleap_io as sio
 import os
 from datetime import datetime
 from pathlib import Path
@@ -23,7 +22,7 @@
     else "cpu"
 )
 
-# %matplotlib widget
+%matplotlib widget
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Data paths
@@ -33,9 +32,23 @@
     "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test_corrected_ST_SM_20241029_113207.csv"
 )
 
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Parmeters
+
+# query points
+step_between_query_frames = 5
+individuals_gt_ids = [57]
+
+# downsample video
+scale_factor = 0.25
+
+# clip video
+chunk_start = 0
+chunk_width = 75
+
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Query points from gt
+# Select query points
 
 ds_gt = load_bboxes.from_file(
     file_path=ground_truth_data,
@@ -43,62 +56,94 @@
     use_frame_numbers_from_file=False,
 )
 
+
+# ------------------
+# Select individuals to use as query points
+if len(individuals_gt_ids) == 0:
+    ds_gt_one = ds_gt
+else:
+    ds_gt_one = ds_gt.isel(individuals=[i - 1 for i in individuals_gt_ids])
+
+print(ds_gt_one)
+
+# Select frames
+list_frames = list(range(ds_gt_one.sizes["time"]))
+frames_to_select = np.array(list_frames)[
+    chunk_start:chunk_start + chunk_width:step_between_query_frames
+]  # every N frame
+print(frames_to_select)
+# --------------------
+
 # Prepare query points array
 # it has frame as first column
 queries_array = np.vstack(
     [
         np.hstack(
             [
-                f * np.ones((ds_gt.sizes["individuals"], 1)),  # frame column
-                ds_gt.position.sel(time=f).values.T,  # x, y columns
+                f
+                * np.ones((ds_gt_one.sizes["individuals"], 1)),  # frame column
+                ds_gt_one.position.sel(time=f).values.T,  # x, y columns
             ]
         )
-        for f in range(ds_gt.sizes["time"])
+        for f in range(ds_gt_one.sizes["time"])
     ]
 )
 
 # Remove rows with nans in position
 queries_array = queries_array[~np.any(np.isnan(queries_array), axis=1), :]
 
-# # Select frames
-# list_frames = list(range(ds_gt.sizes["time"]))
-# frames_to_select = np.array(list_frames)[
-#     ::step_between_query_frames
-# ]  # every second frame
-# queries_sel = queries_array[[col in frames_to_select for col in queries_array[:, 0]], :]
+# Filter selected query points
+queries_sel = queries_array[
+    [col in frames_to_select for col in queries_array[:, 0]], :
+]
+
+print(queries_sel.shape)
 
-# print(np.unique(queries_sel[:, 0]))
+
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Downsample queries by the same scale factor as the video
+queries_downsampled = queries_sel * scale_factor
+queries_downsampled[:, 0] = queries_sel[:, 0]
+print(queries_downsampled.shape)  # torch.Size([1, 614, 2])
+print(queries_downsampled)
+
+# convert to torch tensor and place on device
+queries_downsampled = torch.tensor(queries_downsampled).to(torch.float).to(
+    DEFAULT_DEVICE
+)  # .half().to(device) torch.float16
 
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Read video
-# TODO: is it faster with sleap_io?
+# TODO: is it faster with sleap_io? yes! but then converting to torch is very slow
+# %time video_full = read_video_from_path(video_path)  # Wall time: 13.4 s
+# %time video_full = sio.load_video(video_path)  # Wall time: 27.4 ms
+# %time video_full = np.array(sio.load_video(video_path))
+
 video_full = read_video_from_path(video_path)
 print(type(video_full))
 print(video_full.shape)  # (614, 2160, 4096, 3)
 
-
-# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Make it a torch tensor with dimensions in expected order
+# as torch tensor
 video_full = torch.from_numpy(video_full).permute(0, 3, 1, 2)[None]
 
-print(video_full.shape)  # (1, 614, 3, 2160, 4096)
-print(video_full.device)
-
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Downsample frames
-# out_frame_size = [216, 410] # 108, 205
-# video = F.interpolate(video[0], out_frame_size, mode="bilinear")[None]
+# Downsample video
+video_downsampled = F.interpolate(
+    video_full[0], scale_factor=scale_factor, mode="bilinear"
+)[None]
+
+print(video_downsampled.shape)  # torch.Size([1, 614, 3, 540, 1024])
+print(video_downsampled.device)
 
-print(video_full.shape)  # (1, 614, 3, 2160, 4096)
-print(video_full.device)
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Select first part of the video only  (to fit in GPU)
 # video = video[:, : video.shape[1] // 8]
-chunk_start = 0
-video = video_full[:, chunk_start : chunk_start + 75, :, :, :]  # 75 frames
-print(video.shape)  # (1, 307, 3, 2160, 4096)
+video_downsampled_chunk = video_downsampled[
+    :, chunk_start : chunk_start + chunk_width, :, :, :
+]  # 75 frames
+print(video_downsampled_chunk.shape)  # torch.Size([1, 75, 3, 540, 1024])
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Convert to float and place video on device
@@ -110,61 +155,49 @@
 # video = video.float().to(device)
 # video = video.half().to(device) # Use half precision for memory efficiency
 # TODO: Make sure your video is normalized properly (video / 255.0) before converting to half()
-video = video.to(torch.float).to(device)  # torch.float16
-
-# %%
-# Check gpu memory usage
-# print(torch.cuda.memory_summary())
-# %%
-# Define query points
-queries = torch.tensor(
-    [
-        [0.0, 1070.1, 1697.1],
-        # if downsampled: [0.0, 97.09, 177.34],  # point tracked from the first frame
-        [0.0, 980.7, 1762.2],
-        # if downsampled: [0.0, 106.20, 170.33],
-        # [113.0, 1961.00, 1665.00]
-        # [10.0, 600.0, 500.0],  # frame number 10
-        # [20.0, 750.0, 600.0],  # ...
-        # [30.0, 900.0, 200.0],
-    ]
-)
+video_downsampled_chunk = video_downsampled_chunk.to(torch.float).to(
+    device
+)  # torch.float16
 
-# # Select all points at the first frame of the chunk
-# queries = queries_array[queries_array[:, 0] == chunk_start, :]  
-# queries = queries[:1, :]
-# queries = torch.tensor(queries)
 
-# Place query tensor on GPU
-queries = queries.to(torch.float).to(device)  # .half().to(device) torch.float16
-
-# %%
-# Visualize query points over frame
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Visualize query points over frames
 
 # Create a list of frame numbers corresponding to each point
-frame_numbers = queries[:, 0].int().unique().tolist()
+frame_numbers = queries_downsampled[:, 0].int().unique().tolist()
 
 for frame_number in frame_numbers:
-    # get the query points for the current frame
-    queries_one_frame = queries[queries[:, 0] == frame_number]
-
-    fig, ax = plt.subplots(1, 1)
-    # plot frame
-    ax.imshow(
-        video_full[frame_number, :, :]
-    )  # B T C H W -> H W C
-    # plot query points
-    ax.scatter(
-        x=queries_one_frame[:, 1].cpu(), y=queries_one_frame[:, 2].cpu(), s=5, c="red"
-    )
+    if frame_number in list(range(video_downsampled_chunk.shape[1])):
+        # get the query points for the current frame
+        queries_one_frame = queries_downsampled[
+            queries_downsampled[:, 0] == frame_number
+        ]
+
+        fig, ax = plt.subplots(1, 1)
+        # plot frame
+        ax.imshow(
+            video_downsampled_chunk.permute(0, 1, -2, -1, -3)[
+                0, frame_number, :, :, :
+            ]
+            .cpu()
+            .numpy()
+            .astype(np.int32)
+        )  # B T C H W -> H W C
+        # plot query points
+        ax.scatter(
+            x=queries_one_frame[:, 1].cpu(),
+            y=queries_one_frame[:, 2].cpu(),
+            s=5,
+            c="red",
+        )
 
-    ax.set_title("Frame {}".format(frame_number))
-    ax.set_xlim(0, video.shape[4])
-    ax.set_ylim(0, video.shape[3])
-    ax.invert_yaxis()
+        ax.set_title("Frame {}".format(frame_number))
+        ax.set_xlim(0, video_downsampled_chunk.shape[4])
+        ax.set_ylim(0, video_downsampled_chunk.shape[3])
+        ax.invert_yaxis()
 
 
-# %%
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Get Offline CoTracker model
 model = torch.hub.load("facebookresearch/co-tracker", "cotracker3_offline")
 
@@ -172,8 +205,9 @@
 # Note: this is for memory usage
 model = model.to(device)  # .half().to(device) # .to(torch.float16).to(device)
 
+print(model.model.window_len)
 
-# %%
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # all_half = all(p.dtype == torch.float16 for p in model.parameters())
 # print("All parameters are float16:", all_half)
 
@@ -187,10 +221,12 @@
 #     print(f"{name}: {buffer.dtype}")
 
 
-# %%
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Run CoTracker
 pred_tracks, pred_visibility = model(
-    video, queries=queries[None], backward_tracking=True
+    video_downsampled_chunk, 
+    queries=queries_downsampled[None], 
+    backward_tracking=True,
 )  # B T N 2,  B T N 1
 
 
@@ -206,26 +242,43 @@
 print(pred_tracks.shape)  # (1, 307, 2, 2) --> Batch, Time, N of points, 2 (x,y)
 print(pred_visibility.shape)
 
-
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Save as a movement dataset
+# Upsample results to the original video resolution
 
-# Assuming 1 query per individual
+pred_tracks_upsampled = pred_tracks*1 / scale_factor
 
-# Prepare position array
+# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
+# Save as a movement dataset
 # (n_frames, n_space, n_keypoints, n_individuals)
-position_array = np.empty(
-    ds_gt.position.shape[:2] + (1,) + (ds_gt.position.shape[-1],)
-    # add keypoint dimension
-)
-position_array.fill(np.nan)
-position_array[150 : 150 + 75, :, :, :] = (
-    pred_tracks.permute(1, -1, 0, -2).cpu().numpy()
-)  
+
+# assuming 1 query is 1 individual
+position_array = (
+    pred_tracks_upsampled.permute(1, -1, 0, -2).cpu().numpy()
+)  # (T, 2, 1, Nqueries)
+visibility_array = pred_visibility.cpu().numpy()[0]  # (T, Nqueries)
+
+# set position to nan if non visible
+# (improve this)
+for i in range(visibility_array.shape[1]):
+    position_array[~visibility_array[:, i], :, :, i] = np.nan
+
+# -----------------------------
+# # get each track from its query point
+# position_array_fix = np.vstack(
+#     [
+#         position_array[
+#             frames_to_select[i]:(frames_to_select[i+1]
+#               if i<queries.shape[0]-1 else None), :, i
+#         ]
+#         for i in range(queries.shape[0])
+#     ]
+# )
+# position_array_fix = position_array_fix.T[None,None].T
+# --------------------------------------------
 
 ds = load_poses.from_numpy(
-    position_array=position_array,
-    individual_names=[f"ind_{i}" for i in range(pred_tracks.shape[2])],
+    position_array=position_array,  # position_array_fix,
+    individual_names=[f"ind_{i}" for i in range(position_array.shape[-1])],
     keypoint_names=["centroid"],
     source_software="CoTracker3",
 )
@@ -239,7 +292,7 @@
 
 save_poses.to_sleap_analysis_file(
     ds,
-    f"output/cotracker_offline_output_{timestamp}.h5",
+    f"../tap_models_crabs/output/cotracker_offline_output_{timestamp}.h5",
 )
 
 

From 70c1b03a34a607df2a0f0f9e2a1cd5966233dd77 Mon Sep 17 00:00:00 2001
From: sfmig <33267254+sfmig@users.noreply.github.com>
Date: Fri, 25 Apr 2025 12:14:08 +0100
Subject: [PATCH 04/12] Trying offline querying all individuals first frame

---
 MANIFEST.in                   |  1 +
 notebook_cotracker_offline.py | 74 +++++++++++++++--------------------
 notebook_cotracker_online.py  |  2 +-
 3 files changed, 33 insertions(+), 44 deletions(-)

diff --git a/MANIFEST.in b/MANIFEST.in
index d45a5b91..97086b6d 100644
--- a/MANIFEST.in
+++ b/MANIFEST.in
@@ -16,3 +16,4 @@ recursive-include ethology/annotations/json_schemas/schemas *.md
 
 # Temporarily include notebooks
 include notebook_cotracker_online.py
+include notebook_cotracker_offline.py
diff --git a/notebook_cotracker_offline.py b/notebook_cotracker_offline.py
index c18e43f9..ef424920 100644
--- a/notebook_cotracker_offline.py
+++ b/notebook_cotracker_offline.py
@@ -1,3 +1,5 @@
+"""Offline tracking with CoTracker3."""
+
 # %%
 # Imports
 import os
@@ -22,29 +24,32 @@
     else "cpu"
 )
 
-%matplotlib widget
+# %matplotlib widget
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Data paths
-video_path = "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test.mp4"
+video_path = (
+    "/home/sminano/swc/project_ethology/tap_models_crabs/"
+    "input/04.09.2023-04-Right_RE_test.mp4"
+)
 
 ground_truth_data = Path(
     "/home/sminano/swc/project_ethology/tap_models_crabs/input/04.09.2023-04-Right_RE_test_corrected_ST_SM_20241029_113207.csv"
 )
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-# Parmeters
+# Parameters
 
 # query points
-step_between_query_frames = 5
-individuals_gt_ids = [57]
+step_between_query_frames: int = 1000
+individuals_gt_ids: list[int] = []
 
 # downsample video
-scale_factor = 0.25
+scale_factor: float = 0.25
 
 # clip video
-chunk_start = 0
-chunk_width = 75
+chunk_start: int = 0
+chunk_width = 100
 
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
@@ -69,7 +74,7 @@
 # Select frames
 list_frames = list(range(ds_gt_one.sizes["time"]))
 frames_to_select = np.array(list_frames)[
-    chunk_start:chunk_start + chunk_width:step_between_query_frames
+    chunk_start : chunk_start + chunk_width : step_between_query_frames
 ]  # every N frame
 print(frames_to_select)
 # --------------------
@@ -108,14 +113,15 @@
 print(queries_downsampled)
 
 # convert to torch tensor and place on device
-queries_downsampled = torch.tensor(queries_downsampled).to(torch.float).to(
-    DEFAULT_DEVICE
-)  # .half().to(device) torch.float16
+queries_downsampled_tensor: torch.Tensor = (
+    torch.from_numpy(queries_downsampled).to(torch.float).to(DEFAULT_DEVICE)
+)
 
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Read video
-# TODO: is it faster with sleap_io? yes! but then converting to torch is very slow
+# TODO: is it faster with sleap_io? yes! but then converting
+# to torch is very slow
 # %time video_full = read_video_from_path(video_path)  # Wall time: 13.4 s
 # %time video_full = sio.load_video(video_path)  # Wall time: 27.4 ms
 # %time video_full = np.array(sio.load_video(video_path))
@@ -148,13 +154,13 @@
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Convert to float and place video on device
 # Why do we need .float conversion?
-# chatgpt: Mathematical operations like convolutions, normalizations, or matrix mults expect float32 or float16
+# chatgpt: Mathematical operations like convolutions, normalizations,
+# or matrix mults expect float32 or float16
 
 
 device = "cuda"
 # video = video.float().to(device)
 # video = video.half().to(device) # Use half precision for memory efficiency
-# TODO: Make sure your video is normalized properly (video / 255.0) before converting to half()
 video_downsampled_chunk = video_downsampled_chunk.to(torch.float).to(
     device
 )  # torch.float16
@@ -164,13 +170,13 @@
 # Visualize query points over frames
 
 # Create a list of frame numbers corresponding to each point
-frame_numbers = queries_downsampled[:, 0].int().unique().tolist()
+frame_numbers = queries_downsampled_tensor[:, 0].unique().tolist()
 
 for frame_number in frame_numbers:
     if frame_number in list(range(video_downsampled_chunk.shape[1])):
         # get the query points for the current frame
-        queries_one_frame = queries_downsampled[
-            queries_downsampled[:, 0] == frame_number
+        queries_one_frame = queries_downsampled_tensor[
+            queries_downsampled_tensor[:, 0] == frame_number
         ]
 
         fig, ax = plt.subplots(1, 1)
@@ -191,7 +197,7 @@
             c="red",
         )
 
-        ax.set_title("Frame {}".format(frame_number))
+        ax.set_title(f"Frame {frame_number}")
         ax.set_xlim(0, video_downsampled_chunk.shape[4])
         ax.set_ylim(0, video_downsampled_chunk.shape[3])
         ax.invert_yaxis()
@@ -224,8 +230,8 @@
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Run CoTracker
 pred_tracks, pred_visibility = model(
-    video_downsampled_chunk, 
-    queries=queries_downsampled[None], 
+    video_downsampled_chunk,
+    queries=queries_downsampled_tensor[None],
     backward_tracking=True,
 )  # B T N 2,  B T N 1
 
@@ -239,13 +245,15 @@
 
 # %%
 # TODO: Can I upsample the results to the original video res?
-print(pred_tracks.shape)  # (1, 307, 2, 2) --> Batch, Time, N of points, 2 (x,y)
+print(
+    pred_tracks.shape
+)  # (1, 307, 2, 2) --> Batch, Time, N of points, 2 (x,y)
 print(pred_visibility.shape)
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Upsample results to the original video resolution
 
-pred_tracks_upsampled = pred_tracks*1 / scale_factor
+pred_tracks_upsampled = pred_tracks * 1 / scale_factor
 
 # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
 # Save as a movement dataset
@@ -294,23 +302,3 @@
     ds,
     f"../tap_models_crabs/output/cotracker_offline_output_{timestamp}.h5",
 )
-
-
-# %%
-# # Visualize results
-
-# vis = Visualizer(
-#     save_dir="./output",
-#     linewidth=1,
-#     mode="cool",
-#     tracks_leave_trace=-1,
-#     fps=10,
-# )
-
-# # Generate timestamp of today in format YYYYMMDD_HHMMSS
-# timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
-
-# # Save video with predictions
-# vis.visualize(video, pred_tracks, pred_visibility, filename=f"queries_{timestamp}")
-
-# %%
diff --git a/notebook_cotracker_online.py b/notebook_cotracker_online.py
index f9378f84..5896a09b 100644
--- a/notebook_cotracker_online.py
+++ b/notebook_cotracker_online.py
@@ -1,4 +1,4 @@
-"""Multi-window approach for online tracking with CoTracker3.
+"""Sliding-window approach for online tracking with CoTracker3.
 
 Todo:
 - more query points?

From f959ee90f858396dda3099a44b71832cd7813372 Mon Sep 17 00:00:00 2001
From: AnandMayank <anandmayank698@gmail.com>
Date: Wed, 18 Feb 2026 18:53:40 +0530
Subject: [PATCH 05/12] feat(ethology): add lightweight trajectory
 re-identification utility and BoxMOT-style model loading\n\n- Add reid module
 with motion-based re-identification\n- Refactor OSNet and TensorRT model
 loading to match BoxMOT approach (auto-download/cache)\n- Add unit tests for
 reid utility\n\nThis closes the gap for robust ID handling in long ethology
 recordings and improves reproducibility for all users.

---
 ethology/reid/__init__.py                     |   1 +
 ethology/reid/backbones/__init__.py           |   1 +
 ethology/reid/backbones/hacnn.py              | 292 ++++++++++++++
 ethology/reid/backbones/mlfn.py               | 239 ++++++++++++
 ethology/reid/backbones/mobilenetv2.py        | 247 ++++++++++++
 ethology/reid/backbones/osnet.py              | 345 +++++++++++++++++
 ethology/reid/backbones/osnet_ain.py          | 356 ++++++++++++++++++
 ethology/reid/backbones/resnet.py             | 273 ++++++++++++++
 ethology/reid/backends/__init__.py            |   1 +
 ethology/reid/backends/base_backend.py        | 170 +++++++++
 ethology/reid/backends/onnx_backend.py        |  31 ++
 ethology/reid/backends/openvino_backend.py    |  48 +++
 ethology/reid/backends/pytorch_backend.py     |  20 +
 ethology/reid/backends/tensorrt_backend.py    | 310 +++++++++++++++
 ethology/reid/backends/tflite_backend.py      |  40 ++
 ethology/reid/backends/torchscript_backend.py |  20 +
 ethology/reid/core/__init__.py                |   1 +
 ethology/reid/core/auto_backend.py            |  74 ++++
 ethology/reid/core/config.py                  |  16 +
 ethology/reid/core/factory.py                 |  30 ++
 ethology/reid/core/handler.py                 |  33 ++
 ethology/reid/core/registry.py                |  71 ++++
 ethology/reid/core/reid_handler.py            |  28 ++
 tests/test_unit/test_reid_handler.py          |  12 +
 24 files changed, 2659 insertions(+)
 create mode 100644 ethology/reid/__init__.py
 create mode 100644 ethology/reid/backbones/__init__.py
 create mode 100644 ethology/reid/backbones/hacnn.py
 create mode 100644 ethology/reid/backbones/mlfn.py
 create mode 100644 ethology/reid/backbones/mobilenetv2.py
 create mode 100644 ethology/reid/backbones/osnet.py
 create mode 100644 ethology/reid/backbones/osnet_ain.py
 create mode 100644 ethology/reid/backbones/resnet.py
 create mode 100644 ethology/reid/backends/__init__.py
 create mode 100644 ethology/reid/backends/base_backend.py
 create mode 100644 ethology/reid/backends/onnx_backend.py
 create mode 100644 ethology/reid/backends/openvino_backend.py
 create mode 100644 ethology/reid/backends/pytorch_backend.py
 create mode 100644 ethology/reid/backends/tensorrt_backend.py
 create mode 100644 ethology/reid/backends/tflite_backend.py
 create mode 100644 ethology/reid/backends/torchscript_backend.py
 create mode 100644 ethology/reid/core/__init__.py
 create mode 100644 ethology/reid/core/auto_backend.py
 create mode 100644 ethology/reid/core/config.py
 create mode 100644 ethology/reid/core/factory.py
 create mode 100644 ethology/reid/core/handler.py
 create mode 100644 ethology/reid/core/registry.py
 create mode 100644 ethology/reid/core/reid_handler.py
 create mode 100644 tests/test_unit/test_reid_handler.py

diff --git a/ethology/reid/__init__.py b/ethology/reid/__init__.py
new file mode 100644
index 00000000..d2f6d334
--- /dev/null
+++ b/ethology/reid/__init__.py
@@ -0,0 +1 @@
+# ReID module for ethology
diff --git a/ethology/reid/backbones/__init__.py b/ethology/reid/backbones/__init__.py
new file mode 100644
index 00000000..8e23892b
--- /dev/null
+++ b/ethology/reid/backbones/__init__.py
@@ -0,0 +1 @@
+# Backbones for ReID models
diff --git a/ethology/reid/backbones/hacnn.py b/ethology/reid/backbones/hacnn.py
new file mode 100644
index 00000000..e48ea2b7
--- /dev/null
+++ b/ethology/reid/backbones/hacnn.py
@@ -0,0 +1,292 @@
+# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+from __future__ import absolute_import, division
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["HACNN"]
+
+
+class ConvBlock(nn.Module):
+	def __init__(self, in_c, out_c, k, s=1, p=0):
+		super(ConvBlock, self).__init__()
+		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
+		self.bn = nn.BatchNorm2d(out_c)
+	def forward(self, x):
+		return F.relu(self.bn(self.conv(x)))
+
+class InceptionA(nn.Module):
+	def __init__(self, in_channels, out_channels):
+		super(InceptionA, self).__init__()
+		mid_channels = out_channels // 4
+		self.stream1 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+		)
+		self.stream2 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+		)
+		self.stream3 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+		)
+		self.stream4 = nn.Sequential(
+			nn.AvgPool2d(3, stride=1, padding=1),
+			ConvBlock(in_channels, mid_channels, 1),
+		)
+	def forward(self, x):
+		s1 = self.stream1(x)
+		s2 = self.stream2(x)
+		s3 = self.stream3(x)
+		s4 = self.stream4(x)
+		y = torch.cat([s1, s2, s3, s4], dim=1)
+		return y
+
+class InceptionB(nn.Module):
+	def __init__(self, in_channels, out_channels):
+		super(InceptionB, self).__init__()
+		mid_channels = out_channels // 4
+		self.stream1 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+		)
+		self.stream2 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+		)
+		self.stream3 = nn.Sequential(
+			nn.MaxPool2d(3, stride=2, padding=1),
+			ConvBlock(in_channels, mid_channels * 2, 1),
+		)
+	def forward(self, x):
+		s1 = self.stream1(x)
+		s2 = self.stream2(x)
+		s3 = self.stream3(x)
+		y = torch.cat([s1, s2, s3], dim=1)
+		return y
+
+class SpatialAttn(nn.Module):
+	def __init__(self):
+		super(SpatialAttn, self).__init__()
+		self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
+		self.conv2 = ConvBlock(1, 1, 1)
+	def forward(self, x):
+		x = x.mean(1, keepdim=True)
+		x = self.conv1(x)
+		x = F.interpolate(
+			x, (x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
+		)
+		x = self.conv2(x)
+		return x
+
+class ChannelAttn(nn.Module):
+	def __init__(self, in_channels, reduction_rate=16):
+		super(ChannelAttn, self).__init__()
+		assert in_channels % reduction_rate == 0
+		self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
+		self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
+	def forward(self, x):
+		x = F.avg_pool2d(x, x.size()[2:])
+		x = self.conv1(x)
+		x = self.conv2(x)
+		return x
+
+class SoftAttn(nn.Module):
+	def __init__(self, in_channels):
+		super(SoftAttn, self).__init__()
+		self.spatial_attn = SpatialAttn()
+		self.channel_attn = ChannelAttn(in_channels)
+		self.conv = ConvBlock(in_channels, in_channels, 1)
+	def forward(self, x):
+		y_spatial = self.spatial_attn(x)
+		y_channel = self.channel_attn(x)
+		y = y_spatial * y_channel
+		y = torch.sigmoid(self.conv(y))
+		return y
+
+class HardAttn(nn.Module):
+	def __init__(self, in_channels):
+		super(HardAttn, self).__init__()
+		self.fc = nn.Linear(in_channels, 4 * 2)
+		self.init_params()
+	def init_params(self):
+		self.fc.weight.data.zero_()
+		self.fc.bias.data.copy_(
+			torch.tensor([0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float)
+		)
+	def forward(self, x):
+		x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
+		theta = torch.tanh(self.fc(x))
+		theta = theta.view(-1, 4, 2)
+		return theta
+
+class HarmAttn(nn.Module):
+	def __init__(self, in_channels):
+		super(HarmAttn, self).__init__()
+		self.soft_attn = SoftAttn(in_channels)
+		self.hard_attn = HardAttn(in_channels)
+	def forward(self, x):
+		y_soft_attn = self.soft_attn(x)
+		theta = self.hard_attn(x)
+		return y_soft_attn, theta
+
+class HACNN(nn.Module):
+	def __init__(
+		self,
+		num_classes,
+		loss="softmax",
+		nchannels=[128, 256, 384],
+		feat_dim=512,
+		learn_region=True,
+		use_gpu=True,
+		**kwargs,
+	):
+		super(HACNN, self).__init__()
+		self.loss = loss
+		self.learn_region = learn_region
+		self.use_gpu = use_gpu
+		self.conv = ConvBlock(3, 32, 3, s=2, p=1)
+		self.inception1 = nn.Sequential(
+			InceptionA(32, nchannels[0]),
+			InceptionB(nchannels[0], nchannels[0]),
+		)
+		self.ha1 = HarmAttn(nchannels[0])
+		self.inception2 = nn.Sequential(
+			InceptionA(nchannels[0], nchannels[1]),
+			InceptionB(nchannels[1], nchannels[1]),
+		)
+		self.ha2 = HarmAttn(nchannels[1])
+		self.inception3 = nn.Sequential(
+			InceptionA(nchannels[1], nchannels[2]),
+			InceptionB(nchannels[2], nchannels[2]),
+		)
+		self.ha3 = HarmAttn(nchannels[2])
+		self.fc_global = nn.Sequential(
+			nn.Linear(nchannels[2], feat_dim),
+			nn.BatchNorm1d(feat_dim),
+			nn.ReLU(),
+		)
+		self.classifier_global = nn.Linear(feat_dim, num_classes)
+		if self.learn_region:
+			self.init_scale_factors()
+			self.local_conv1 = InceptionB(32, nchannels[0])
+			self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
+			self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
+			self.fc_local = nn.Sequential(
+				nn.Linear(nchannels[2] * 4, feat_dim),
+				nn.BatchNorm1d(feat_dim),
+				nn.ReLU(),
+			)
+			self.classifier_local = nn.Linear(feat_dim, num_classes)
+			self.feat_dim = feat_dim * 2
+		else:
+			self.feat_dim = feat_dim
+	def init_scale_factors(self):
+		self.scale_factors = []
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+	def stn(self, x, theta):
+		grid = F.affine_grid(theta, x.size())
+		x = F.grid_sample(x, grid)
+		return x
+	def transform_theta(self, theta_i, region_idx):
+		scale_factors = self.scale_factors[region_idx]
+		theta = torch.zeros(theta_i.size(0), 2, 3)
+		theta[:, :, :2] = scale_factors
+		theta[:, :, -1] = theta_i
+		if self.use_gpu:
+			theta = theta.to(next(self.parameters()).device)
+		return theta
+	def forward(self, x):
+		assert (
+			x.size(2) == 160 and x.size(3) == 64
+		), "Input size does not match, expected (160, 64) but got ({}, {})".format(
+			x.size(2), x.size(3)
+		)
+		x = self.conv(x)
+		x1 = self.inception1(x)
+		x1_attn, x1_theta = self.ha1(x1)
+		x1_out = x1 * x1_attn
+		if self.learn_region:
+			x1_local_list = []
+			for region_idx in range(4):
+				x1_theta_i = x1_theta[:, region_idx, :]
+				x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
+				x1_trans_i = self.stn(x, x1_theta_i)
+				x1_trans_i = F.interpolate(
+					x1_trans_i, (24, 28), mode="bilinear", align_corners=True
+				)
+				x1_local_i = self.local_conv1(x1_trans_i)
+				x1_local_list.append(x1_local_i)
+		x2 = self.inception2(x1_out)
+		x2_attn, x2_theta = self.ha2(x2)
+		x2_out = x2 * x2_attn
+		if self.learn_region:
+			x2_local_list = []
+			for region_idx in range(4):
+				x2_theta_i = x2_theta[:, region_idx, :]
+				x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
+				x2_trans_i = self.stn(x1_out, x2_theta_i)
+				x2_trans_i = F.interpolate(
+					x2_trans_i, (12, 14), mode="bilinear", align_corners=True
+				)
+				x2_local_i = x2_trans_i + x1_local_list[region_idx]
+				x2_local_i = self.local_conv2(x2_local_i)
+				x2_local_list.append(x2_local_i)
+		x3 = self.inception3(x2_out)
+		x3_attn, x3_theta = self.ha3(x3)
+		x3_out = x3 * x3_attn
+		if self.learn_region:
+			x3_local_list = []
+			for region_idx in range(4):
+				x3_theta_i = x3_theta[:, region_idx, :]
+				x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
+				x3_trans_i = self.stn(x2_out, x3_theta_i)
+				x3_trans_i = F.interpolate(
+					x3_trans_i, (6, 7), mode="bilinear", align_corners=True
+				)
+				x3_local_i = x3_trans_i + x2_local_list[region_idx]
+				x3_local_i = self.local_conv3(x3_local_i)
+				x3_local_list.append(x3_local_i)
+		x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
+			x3_out.size(0), x3_out.size(1)
+		)
+		x_global = self.fc_global(x_global)
+		if self.learn_region:
+			x_local_list = []
+			for region_idx in range(4):
+				x_local_i = x3_local_list[region_idx]
+				x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
+					x_local_i.size(0), -1
+				)
+				x_local_list.append(x_local_i)
+			x_local = torch.cat(x_local_list, 1)
+			x_local = self.fc_local(x_local)
+		if not self.training:
+			if self.learn_region:
+				x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
+				x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
+				return torch.cat([x_global, x_local], 1)
+			else:
+				return x_global
+		prelogits_global = self.classifier_global(x_global)
+		if self.learn_region:
+			prelogits_local = self.classifier_local(x_local)
+		if self.loss == "softmax":
+			if self.learn_region:
+				return (prelogits_global, prelogits_local)
+			else:
+				return prelogits_global
+		elif self.loss == "triplet":
+			if self.learn_region:
+				return (prelogits_global, prelogits_local), (x_global, x_local)
+			else:
+				return prelogits_global, x_global
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
new file mode 100644
index 00000000..bb1e235c
--- /dev/null
+++ b/ethology/reid/backbones/mlfn.py
@@ -0,0 +1,239 @@
+# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+from __future__ import absolute_import, division
+import torch
+import torch.utils.model_zoo as model_zoo
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["mlfn"]
+model_urls = {
+	# training epoch = 5, top1 = 51.6
+	"imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
+}
+
+
+class MLFNBlock(nn.Module):
+	def __init__(self, in_channels, out_channels, stride, fsm_channels, groups=32):
+		super(MLFNBlock, self).__init__()
+		self.groups = groups
+		mid_channels = out_channels // 2
+
+		# Factor Modules
+		self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False)
+		self.fm_bn1 = nn.BatchNorm2d(mid_channels)
+		self.fm_conv2 = nn.Conv2d(
+			mid_channels,
+			mid_channels,
+			3,
+			stride=stride,
+			padding=1,
+			bias=False,
+			groups=self.groups,
+		)
+		self.fm_bn2 = nn.BatchNorm2d(mid_channels)
+		self.fm_conv3 = nn.Conv2d(mid_channels, out_channels, 1, bias=False)
+		self.fm_bn3 = nn.BatchNorm2d(out_channels)
+
+		# Factor Selection Module
+		self.fsm = nn.Sequential(
+			nn.AdaptiveAvgPool2d(1),
+			nn.Conv2d(in_channels, fsm_channels[0], 1),
+			nn.BatchNorm2d(fsm_channels[0]),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(fsm_channels[0], fsm_channels[1], 1),
+			nn.BatchNorm2d(fsm_channels[1]),
+			nn.ReLU(inplace=True),
+			nn.Conv2d(fsm_channels[1], self.groups, 1),
+			nn.BatchNorm2d(self.groups),
+			nn.Sigmoid(),
+		)
+
+		self.downsample = None
+		if in_channels != out_channels or stride > 1:
+			self.downsample = nn.Sequential(
+				nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
+				nn.BatchNorm2d(out_channels),
+			)
+
+	def forward(self, x):
+		residual = x
+		s = self.fsm(x)
+
+		# reduce dimension
+		x = self.fm_conv1(x)
+		x = self.fm_bn1(x)
+		x = F.relu(x, inplace=True)
+
+		# group convolution
+		x = self.fm_conv2(x)
+		x = self.fm_bn2(x)
+		x = F.relu(x, inplace=True)
+
+		# factor selection
+		b, c = x.size(0), x.size(1)
+		n = c // self.groups
+		ss = s.repeat(1, n, 1, 1)  # from (b, g, 1, 1) to (b, g*n=c, 1, 1)
+		ss = ss.view(b, n, self.groups, 1, 1)
+		ss = ss.permute(0, 2, 1, 3, 4).contiguous()
+		ss = ss.view(b, c, 1, 1)
+		x = ss * x
+
+		# recover dimension
+		x = self.fm_conv3(x)
+		x = self.fm_bn3(x)
+		x = F.relu(x, inplace=True)
+
+		if self.downsample is not None:
+			residual = self.downsample(residual)
+
+		return F.relu(residual + x, inplace=True), s
+
+
+class MLFN(nn.Module):
+	"""Multi-Level Factorisation Net.
+
+	Reference:
+		Chang et al. Multi-Level Factorisation Net for
+		Person Re-Identification. CVPR 2018.
+
+	Public keys:
+		- ``mlfn``: MLFN (Multi-Level Factorisation Net).
+	"""
+
+	def __init__(
+		self,
+		num_classes,
+		loss="softmax",
+		groups=32,
+		channels=[64, 256, 512, 1024, 2048],
+		embed_dim=1024,
+		**kwargs,
+	):
+		super(MLFN, self).__init__()
+		self.loss = loss
+		self.groups = groups
+
+		# first convolutional layer
+		self.conv1 = nn.Conv2d(3, channels[0], 7, stride=2, padding=3)
+		self.bn1 = nn.BatchNorm2d(channels[0])
+		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+
+		# main body
+		self.feature = nn.ModuleList(
+			[
+				# layer 1-3
+				MLFNBlock(channels[0], channels[1], 1, [128, 64], self.groups),
+				MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
+				MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
+				# layer 4-7
+				MLFNBlock(channels[1], channels[2], 2, [256, 128], self.groups),
+				MLFNBlock(channels[2], channels[2], 1, [256, 128], self.groups),
+				MLFNBlock(channels[2], channels[2], 1, [256, 128], self.groups),
+				MLFNBlock(channels[2], channels[2], 1, [256, 128], self.groups),
+				# layer 8-13
+				MLFNBlock(channels[2], channels[3], 2, [512, 128], self.groups),
+				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
+				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
+				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
+				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
+				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
+				# layer 14-16
+				MLFNBlock(channels[3], channels[4], 2, [512, 128], self.groups),
+				MLFNBlock(channels[4], channels[4], 1, [512, 128], self.groups),
+				MLFNBlock(channels[4], channels[4], 1, [512, 128], self.groups),
+			]
+		)
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+
+		# projection functions
+		self.fc_x = nn.Sequential(
+			nn.Conv2d(channels[4], embed_dim, 1, bias=False),
+			nn.BatchNorm2d(embed_dim),
+			nn.ReLU(inplace=True),
+		)
+		self.fc_s = nn.Sequential(
+			nn.Conv2d(self.groups * 16, embed_dim, 1, bias=False),
+			nn.BatchNorm2d(embed_dim),
+			nn.ReLU(inplace=True),
+		)
+
+		self.classifier = nn.Linear(embed_dim, num_classes)
+
+		self.init_params()
+
+	def init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.bn1(x)
+		x = F.relu(x, inplace=True)
+		x = self.maxpool(x)
+
+		s_hat = []
+		for block in self.feature:
+			x, s = block(x)
+			s_hat.append(s)
+		s_hat = torch.cat(s_hat, 1)
+
+		x = self.global_avgpool(x)
+		x = self.fc_x(x)
+		s_hat = self.fc_s(s_hat)
+
+		v = (x + s_hat) * 0.5
+		v = v.view(v.size(0), -1)
+
+		if not self.training:
+			return v
+
+		y = self.classifier(v)
+
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
+
+
+def init_pretrained_weights(model, model_url):
+	"""Initializes model with pretrained weights.
+
+	Layers that don't match with pretrained layers in name or size are kept unchanged.
+	"""
+	pretrain_dict = model_zoo.load_url(model_url)
+	model_dict = model.state_dict()
+	pretrain_dict = {
+		k: v
+		for k, v in pretrain_dict.items()
+		if k in model_dict and model_dict[k].size() == v.size()
+	}
+	model_dict.update(pretrain_dict)
+	model.load_state_dict(model_dict)
+
+
+def mlfn(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = MLFN(num_classes, loss, **kwargs)
+	if pretrained:
+		# init_pretrained_weights(model, model_urls['imagenet'])
+		import warnings
+
+		warnings.warn(
+			"The imagenet pretrained weights need to be manually downloaded from {}".format(
+				model_urls["imagenet"]
+			)
+		)
+	return model
+# Copied from boxmot/boxmot/reid/backbones/mlfn.py
diff --git a/ethology/reid/backbones/mobilenetv2.py b/ethology/reid/backbones/mobilenetv2.py
new file mode 100644
index 00000000..35a16219
--- /dev/null
+++ b/ethology/reid/backbones/mobilenetv2.py
@@ -0,0 +1,247 @@
+# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+from __future__ import absolute_import, division
+
+import torch.utils.model_zoo as model_zoo
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["mobilenetv2_x1_0", "mobilenetv2_x1_4"]
+
+model_urls = {
+	# 1.0: top-1 71.3
+	"mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
+	# 1.4: top-1 73.9
+	"mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
+}
+
+
+class ConvBlock(nn.Module):
+	"""Basic convolutional block.
+
+	convolution (bias discarded) + batch normalization + relu6.
+
+	Args:
+		in_c (int): number of input channels.
+		out_c (int): number of output channels.
+		k (int or tuple): kernel size.
+		s (int or tuple): stride.
+		p (int or tuple): padding.
+		g (int): number of blocked connections from input channels
+			to output channels (default: 1).
+	"""
+
+	def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
+		super(ConvBlock, self).__init__()
+		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
+		self.bn = nn.BatchNorm2d(out_c)
+
+	def forward(self, x):
+		return F.relu6(self.bn(self.conv(x)))
+
+
+class Bottleneck(nn.Module):
+	def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
+		super(Bottleneck, self).__init__()
+		mid_channels = in_channels * expansion_factor
+		self.use_residual = stride == 1 and in_channels == out_channels
+		self.conv1 = ConvBlock(in_channels, mid_channels, 1)
+		self.dwconv2 = ConvBlock(
+			mid_channels, mid_channels, 3, stride, 1, g=mid_channels
+		)
+		self.conv3 = nn.Sequential(
+			nn.Conv2d(mid_channels, out_channels, 1, bias=False),
+			nn.BatchNorm2d(out_channels),
+		)
+
+	def forward(self, x):
+		m = self.conv1(x)
+		m = self.dwconv2(m)
+		m = self.conv3(m)
+		if self.use_residual:
+			return x + m
+		else:
+			return m
+
+
+class MobileNetV2(nn.Module):
+	"""MobileNetV2.
+
+	Reference:
+		Sandler et al. MobileNetV2: Inverted Residuals and
+		Linear Bottlenecks. CVPR 2018.
+
+	Public keys:
+		- ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
+		- ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
+	"""
+
+	def __init__(
+		self,
+		num_classes,
+		width_mult=1,
+		loss="softmax",
+		fc_dims=None,
+		dropout_p=None,
+		**kwargs,
+	):
+		super(MobileNetV2, self).__init__()
+		self.loss = loss
+		self.in_channels = int(32 * width_mult)
+		self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
+
+		# construct layers
+		self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
+		self.conv2 = self._make_layer(Bottleneck, 1, int(16 * width_mult), 1, 1)
+		self.conv3 = self._make_layer(Bottleneck, 6, int(24 * width_mult), 2, 2)
+		self.conv4 = self._make_layer(Bottleneck, 6, int(32 * width_mult), 3, 2)
+		self.conv5 = self._make_layer(Bottleneck, 6, int(64 * width_mult), 4, 2)
+		self.conv6 = self._make_layer(Bottleneck, 6, int(96 * width_mult), 3, 1)
+		self.conv7 = self._make_layer(Bottleneck, 6, int(160 * width_mult), 3, 2)
+		self.conv8 = self._make_layer(Bottleneck, 6, int(320 * width_mult), 1, 1)
+		self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
+
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc = self._construct_fc_layer(fc_dims, self.feature_dim, dropout_p)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+
+		self._init_params()
+
+	def _make_layer(self, block, t, c, n, s):
+		# t: expansion factor
+		# c: output channels
+		# n: number of blocks
+		# s: stride for first layer
+		layers = []
+		layers.append(block(self.in_channels, c, t, s))
+		self.in_channels = c
+		for i in range(1, n):
+			layers.append(block(self.in_channels, c, t))
+		return nn.Sequential(*layers)
+
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		"""Constructs fully connected layer.
+
+		Args:
+			fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
+			input_dim (int): input dimension
+			dropout_p (float): dropout probability, if None, dropout is unused
+		"""
+		if fc_dims is None:
+			self.feature_dim = input_dim
+			return None
+
+		assert isinstance(
+			fc_dims, (list, tuple)
+		), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
+
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU(inplace=True))
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+
+		self.feature_dim = fc_dims[-1]
+
+		return nn.Sequential(*layers)
+
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.conv3(x)
+		x = self.conv4(x)
+		x = self.conv5(x)
+		x = self.conv6(x)
+		x = self.conv7(x)
+		x = self.conv8(x)
+		x = self.conv9(x)
+		return x
+
+	def forward(self, x):
+		f = self.featuremaps(x)
+		v = self.global_avgpool(f)
+		v = v.view(v.size(0), -1)
+
+		if self.fc is not None:
+			v = self.fc(v)
+
+		if not self.training:
+			return v
+
+		y = self.classifier(v)
+
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
+
+
+def init_pretrained_weights(model, model_url):
+	"""Initializes model with pretrained weights.
+
+	Layers that don't match with pretrained layers in name or size are kept unchanged.
+	"""
+	pretrain_dict = model_zoo.load_url(model_url)
+	model_dict = model.state_dict()
+	pretrain_dict = {
+		k: v
+		for k, v in pretrain_dict.items()
+		if k in model_dict and model_dict[k].size() == v.size()
+	}
+	model_dict.update(pretrain_dict)
+	model.load_state_dict(model_dict)
+
+
+def mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs):
+	model = MobileNetV2(
+		num_classes, loss=loss, width_mult=1, fc_dims=None, dropout_p=None, **kwargs
+	)
+	if pretrained:
+		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
+		import warnings
+
+		warnings.warn(
+			"The imagenet pretrained weights need to be manually downloaded from {}".format(
+				model_urls["mobilenetv2_x1_0"]
+			)
+		)
+	return model
+
+
+def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):
+	model = MobileNetV2(
+		num_classes, loss=loss, width_mult=1.4, fc_dims=None, dropout_p=None, **kwargs
+	)
+	if pretrained:
+		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
+		import warnings
+
+		warnings.warn(
+			"The imagenet pretrained weights need to be manually downloaded from {}".format(
+				model_urls["mobilenetv2_x1_4"]
+			)
+		)
+	return model
+# Copied from boxmot/boxmot/reid/backbones/mobilenetv2.py
diff --git a/ethology/reid/backbones/osnet.py b/ethology/reid/backbones/osnet.py
new file mode 100644
index 00000000..c07e4e45
--- /dev/null
+++ b/ethology/reid/backbones/osnet.py
@@ -0,0 +1,345 @@
+# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+from __future__ import absolute_import, division
+
+import warnings
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["osnet_x1_0", "osnet_x0_75", "osnet_x0_5", "osnet_x0_25", "osnet_ibn_x1_0"]
+
+pretrained_urls = {
+	"osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
+	"osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
+	"osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
+	"osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
+	"osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
+}
+
+# ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, ChannelGate, OSBlock...
+
+class ConvLayer(nn.Module):
+	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
+		super(ConvLayer, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
+		if IN:
+			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+		else:
+			self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
+
+class Conv1x1(nn.Module):
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv1x1, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
+
+class Conv1x1Linear(nn.Module):
+	def __init__(self, in_channels, out_channels, stride=1):
+		super(Conv1x1Linear, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
+		self.bn = nn.BatchNorm2d(out_channels)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return x
+
+class Conv3x3(nn.Module):
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv3x3, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
+
+class LightConv3x3(nn.Module):
+	def __init__(self, in_channels, out_channels):
+		super(LightConv3x3, self).__init__()
+		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
+		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
+
+class ChannelGate(nn.Module):
+	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
+		super(ChannelGate, self).__init__()
+		if num_gates is None:
+			num_gates = in_channels
+		self.return_gates = return_gates
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
+		self.norm1 = None
+		if layer_norm:
+			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+		self.relu = nn.ReLU(inplace=True)
+		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
+		if gate_activation == "sigmoid":
+			self.gate_activation = nn.Sigmoid()
+		elif gate_activation == "relu":
+			self.gate_activation = nn.ReLU(inplace=True)
+		elif gate_activation == "linear":
+			self.gate_activation = None
+		else:
+			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
+	def forward(self, x):
+		input = x
+		x = self.global_avgpool(x)
+		x = self.fc1(x)
+		if self.norm1 is not None:
+			x = self.norm1(x)
+		x = self.relu(x)
+		x = self.fc2(x)
+		if self.gate_activation is not None:
+			x = self.gate_activation(x)
+		if self.return_gates:
+			return x
+		return input * x
+
+class OSBlock(nn.Module):
+	def __init__(self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs):
+		super(OSBlock, self).__init__()
+		mid_channels = out_channels // bottleneck_reduction
+		self.conv1 = Conv1x1(in_channels, mid_channels)
+		self.conv2a = LightConv3x3(mid_channels, mid_channels)
+		self.conv2b = nn.Sequential(
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+		)
+		self.conv2c = nn.Sequential(
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+		)
+		self.conv2d = nn.Sequential(
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+		)
+		self.gate = ChannelGate(mid_channels)
+		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+		self.downsample = None
+		if in_channels != out_channels:
+			self.downsample = Conv1x1Linear(in_channels, out_channels)
+		self.IN = None
+		if IN:
+			self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+	def forward(self, x):
+		identity = x
+		x1 = self.conv1(x)
+		x2a = self.conv2a(x1)
+		x2b = self.conv2b(x1)
+		x2c = self.conv2c(x1)
+		x2d = self.conv2d(x1)
+		x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+		x3 = self.conv3(x2)
+		if self.downsample is not None:
+			identity = self.downsample(identity)
+		out = x3 + identity
+		if self.IN is not None:
+			out = self.IN(out)
+		return F.relu(out)
+
+class OSNet(nn.Module):
+	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", IN=False, **kwargs):
+		super(OSNet, self).__init__()
+		num_blocks = len(blocks)
+		assert num_blocks == len(layers)
+		assert num_blocks == len(channels) - 1
+		self.loss = loss
+		self.feature_dim = feature_dim
+		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
+		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN)
+		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True)
+		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False)
+		self.conv5 = Conv1x1(channels[3], channels[3])
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+		self._init_params()
+	def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False):
+		layers = []
+		layers.append(block(in_channels, out_channels, IN=IN))
+		for i in range(1, layer):
+			layers.append(block(out_channels, out_channels, IN=IN))
+		if reduce_spatial_size:
+			layers.append(nn.Sequential(Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2)))
+		return nn.Sequential(*layers)
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		if fc_dims is None or fc_dims < 0:
+			self.feature_dim = input_dim
+			return None
+		if isinstance(fc_dims, int):
+			fc_dims = [fc_dims]
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU(inplace=True))
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+		self.feature_dim = fc_dims[-1]
+		return nn.Sequential(*layers)
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.maxpool(x)
+		x = self.conv2(x)
+		x = self.conv3(x)
+		x = self.conv4(x)
+		x = self.conv5(x)
+		return x
+	def forward(self, x, return_featuremaps=False):
+		x = self.featuremaps(x)
+		if return_featuremaps:
+			return x
+		v = self.global_avgpool(x)
+		v = v.view(v.size(0), -1)
+		if self.fc is not None:
+			v = self.fc(v)
+		if not self.training:
+			return v
+		y = self.classifier(v)
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
+
+def init_pretrained_weights(model, key=""):
+	import errno
+	import os
+	from collections import OrderedDict
+	import gdown
+	def _get_torch_home():
+		ENV_TORCH_HOME = "TORCH_HOME"
+		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+		DEFAULT_CACHE_DIR = "~/.cache"
+		torch_home = os.path.expanduser(
+			os.getenv(
+				ENV_TORCH_HOME,
+				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
+			)
+		)
+		return torch_home
+	filename = key + "_imagenet.pth"
+	# Try ethology/models/ directory first
+	ethology_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
+	models_dir = os.path.join(ethology_root, "models")
+	os.makedirs(models_dir, exist_ok=True)
+	local_file = os.path.join(models_dir, filename)
+	torch_home = _get_torch_home()
+	model_dir = os.path.join(torch_home, "checkpoints")
+	os.makedirs(model_dir, exist_ok=True)
+	cached_file = os.path.join(model_dir, filename)
+	# Prefer ethology/models/ directory file if present
+	if os.path.exists(local_file):
+		print(f"[OSNet] Loading model weights from {local_file}")
+		cached_file = local_file
+	elif os.path.exists(cached_file):
+		print(f"[OSNet] Loading model weights from {cached_file}")
+	else:
+		print(f"[OSNet] Downloading model weights to {cached_file}")
+		gdown.download(pretrained_urls[key], cached_file, quiet=False)
+	state_dict = torch.load(cached_file)
+	model_dict = model.state_dict()
+	new_state_dict = OrderedDict()
+	matched_layers, discarded_layers = [], []
+	for k, v in state_dict.items():
+		if k.startswith("module."):
+			k = k[7:]
+		if k in model_dict and model_dict[k].size() == v.size():
+			new_state_dict[k] = v
+			matched_layers.append(k)
+		else:
+			discarded_layers.append(k)
+	model_dict.update(new_state_dict)
+	model.load_state_dict(model_dict)
+	if len(matched_layers) == 0:
+		warnings.warn(
+			'The pretrained weights from "{}" cannot be loaded, '
+			"please check the key names manually "
+			"(** ignored and continue **)".format(cached_file)
+		)
+	else:
+		print(
+			'Successfully loaded imagenet pretrained weights from "{}"'.format(
+				cached_file
+			)
+		)
+		if len(discarded_layers) > 0:
+			print(
+				"** The following layers are discarded "
+				"due to unmatched keys or layer size: {}".format(discarded_layers)
+			)
+
+def osnet_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x1_0")
+	return model
+
+def osnet_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x0_75")
+	return model
+
+def osnet_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x0_5")
+	return model
+
+def osnet_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x0_25")
+	return model
+
+def osnet_ibn_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ibn_x1_0")
+	return model
diff --git a/ethology/reid/backbones/osnet_ain.py b/ethology/reid/backbones/osnet_ain.py
new file mode 100644
index 00000000..9e052209
--- /dev/null
+++ b/ethology/reid/backbones/osnet_ain.py
@@ -0,0 +1,356 @@
+# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+from __future__ import absolute_import, division
+
+import warnings
+
+import torch
+from torch import nn
+from torch.nn import functional as F
+
+__all__ = ["osnet_ain_x1_0", "osnet_ain_x0_75", "osnet_ain_x0_5", "osnet_ain_x0_25"]
+
+pretrained_urls = {
+	"osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
+	"osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
+	"osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
+	"osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
+}
+
+# ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, LightConvStream, ChannelGate, OSBlock, OSBlockINin, OSNet, init_pretrained_weights, and instantiation functions...
+
+class ConvLayer(nn.Module):
+	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
+		super(ConvLayer, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
+		if IN:
+			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+		else:
+			self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return self.relu(x)
+
+class Conv1x1(nn.Module):
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv1x1, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return self.relu(x)
+
+class Conv1x1Linear(nn.Module):
+	def __init__(self, in_channels, out_channels, stride=1, bn=True):
+		super(Conv1x1Linear, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
+		self.bn = None
+		if bn:
+			self.bn = nn.BatchNorm2d(out_channels)
+	def forward(self, x):
+		x = self.conv(x)
+		if self.bn is not None:
+			x = self.bn(x)
+		return x
+
+class Conv3x3(nn.Module):
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv3x3, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return self.relu(x)
+
+class LightConv3x3(nn.Module):
+	def __init__(self, in_channels, out_channels):
+		super(LightConv3x3, self).__init__()
+		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
+		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.bn(x)
+		return self.relu(x)
+
+class LightConvStream(nn.Module):
+	def __init__(self, in_channels, out_channels, depth):
+		super(LightConvStream, self).__init__()
+		assert depth >= 1
+		layers = [LightConv3x3(in_channels, out_channels)]
+		for i in range(depth - 1):
+			layers.append(LightConv3x3(out_channels, out_channels))
+		self.layers = nn.Sequential(*layers)
+	def forward(self, x):
+		return self.layers(x)
+
+class ChannelGate(nn.Module):
+	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
+		super(ChannelGate, self).__init__()
+		if num_gates is None:
+			num_gates = in_channels
+		self.return_gates = return_gates
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
+		self.norm1 = None
+		if layer_norm:
+			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+		self.relu = nn.ReLU()
+		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
+		if gate_activation == "sigmoid":
+			self.gate_activation = nn.Sigmoid()
+		elif gate_activation == "relu":
+			self.gate_activation = nn.ReLU()
+		elif gate_activation == "linear":
+			self.gate_activation = None
+		else:
+			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
+	def forward(self, x):
+		input = x
+		x = self.global_avgpool(x)
+		x = self.fc1(x)
+		if self.norm1 is not None:
+			x = self.norm1(x)
+		x = self.relu(x)
+		x = self.fc2(x)
+		if self.gate_activation is not None:
+			x = self.gate_activation(x)
+		if self.return_gates:
+			return x
+		return input * x
+
+class OSBlock(nn.Module):
+	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+		super(OSBlock, self).__init__()
+		assert T >= 1
+		assert out_channels >= reduction and out_channels % reduction == 0
+		mid_channels = out_channels // reduction
+		self.conv1 = Conv1x1(in_channels, mid_channels)
+		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
+		self.gate = ChannelGate(mid_channels)
+		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+		self.downsample = None
+		if in_channels != out_channels:
+			self.downsample = Conv1x1Linear(in_channels, out_channels)
+	def forward(self, x):
+		identity = x
+		x1 = self.conv1(x)
+		x2 = 0
+		for conv2_t in self.conv2:
+			x2_t = conv2_t(x1)
+			x2 = x2 + self.gate(x2_t)
+		x3 = self.conv3(x2)
+		if self.downsample is not None:
+			identity = self.downsample(identity)
+		out = x3 + identity
+		return F.relu(out)
+
+class OSBlockINin(nn.Module):
+	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+		super(OSBlockINin, self).__init__()
+		assert T >= 1
+		assert out_channels >= reduction and out_channels % reduction == 0
+		mid_channels = out_channels // reduction
+		self.conv1 = Conv1x1(in_channels, mid_channels)
+		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
+		self.gate = ChannelGate(mid_channels)
+		self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
+		self.downsample = None
+		if in_channels != out_channels:
+			self.downsample = Conv1x1Linear(in_channels, out_channels)
+		self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+	def forward(self, x):
+		identity = x
+		x1 = self.conv1(x)
+		x2 = 0
+		for conv2_t in self.conv2:
+			x2_t = conv2_t(x1)
+			x2 = x2 + self.gate(x2_t)
+		x3 = self.conv3(x2)
+		x3 = self.IN(x3)
+		if self.downsample is not None:
+			identity = self.downsample(identity)
+		out = x3 + identity
+		return F.relu(out)
+
+class OSNet(nn.Module):
+	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", conv1_IN=False, **kwargs):
+		super(OSNet, self).__init__()
+		num_blocks = len(blocks)
+		assert num_blocks == len(layers)
+		assert num_blocks == len(channels) - 1
+		self.loss = loss
+		self.feature_dim = feature_dim
+		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=conv1_IN)
+		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1])
+		self.pool2 = nn.Sequential(Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2))
+		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2])
+		self.pool3 = nn.Sequential(Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2))
+		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3])
+		self.conv5 = Conv1x1(channels[3], channels[3])
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+		self._init_params()
+	def _make_layer(self, blocks, layer, in_channels, out_channels):
+		layers = []
+		layers += [blocks[0](in_channels, out_channels)]
+		for i in range(1, len(blocks)):
+			layers += [blocks[i](out_channels, out_channels)]
+		return nn.Sequential(*layers)
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		if fc_dims is None or fc_dims < 0:
+			self.feature_dim = input_dim
+			return None
+		if isinstance(fc_dims, int):
+			fc_dims = [fc_dims]
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU())
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+		self.feature_dim = fc_dims[-1]
+		return nn.Sequential(*layers)
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.InstanceNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.maxpool(x)
+		x = self.conv2(x)
+		x = self.pool2(x)
+		x = self.conv3(x)
+		x = self.pool3(x)
+		x = self.conv4(x)
+		x = self.conv5(x)
+		return x
+	def forward(self, x, return_featuremaps=False):
+		x = self.featuremaps(x)
+		if return_featuremaps:
+			return x
+		v = self.global_avgpool(x)
+		v = v.view(v.size(0), -1)
+		if self.fc is not None:
+			v = self.fc(v)
+		if not self.training:
+			return v
+		y = self.classifier(v)
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
+
+def init_pretrained_weights(model, key=""):
+	import errno
+	import os
+	from collections import OrderedDict
+	import gdown
+	def _get_torch_home():
+		ENV_TORCH_HOME = "TORCH_HOME"
+		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+		DEFAULT_CACHE_DIR = "~/.cache"
+		torch_home = os.path.expanduser(
+			os.getenv(
+				ENV_TORCH_HOME,
+				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
+			)
+		)
+		return torch_home
+	torch_home = _get_torch_home()
+	model_dir = os.path.join(torch_home, "checkpoints")
+	try:
+		os.makedirs(model_dir)
+	except OSError as e:
+		if e.errno == errno.EEXIST:
+			pass
+		else:
+			raise
+	filename = key + "_imagenet.pth"
+	cached_file = os.path.join(model_dir, filename)
+	if not os.path.exists(cached_file):
+		gdown.download(pretrained_urls[key], cached_file, quiet=False)
+	state_dict = torch.load(cached_file)
+	model_dict = model.state_dict()
+	new_state_dict = OrderedDict()
+	matched_layers, discarded_layers = [], []
+	for k, v in state_dict.items():
+		if k.startswith("module."):
+			k = k[7:]
+		if k in model_dict and model_dict[k].size() == v.size():
+			new_state_dict[k] = v
+			matched_layers.append(k)
+		else:
+			discarded_layers.append(k)
+	model_dict.update(new_state_dict)
+	model.load_state_dict(model_dict)
+	if len(matched_layers) == 0:
+		warnings.warn(
+			'The pretrained weights from "{}" cannot be loaded, '
+			"please check the key names manually "
+			"(** ignored and continue **)".format(cached_file)
+		)
+	else:
+		print(
+			'Successfully loaded imagenet pretrained weights from "{}"'.format(
+				cached_file
+			)
+		)
+		if len(discarded_layers) > 0:
+			print(
+				"** The following layers are discarded "
+				"due to unmatched keys or layer size: {}".format(discarded_layers)
+			)
+
+def osnet_ain_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x1_0")
+	return model
+
+def osnet_ain_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x0_75")
+	return model
+
+def osnet_ain_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x0_5")
+	return model
+
+def osnet_ain_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x0_25")
+	return model
diff --git a/ethology/reid/backbones/resnet.py b/ethology/reid/backbones/resnet.py
new file mode 100644
index 00000000..12ca5639
--- /dev/null
+++ b/ethology/reid/backbones/resnet.py
@@ -0,0 +1,273 @@
+# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+"""
+Code source: https://github.com/pytorch/vision
+"""
+from __future__ import absolute_import, division
+
+import torch.utils.model_zoo as model_zoo
+from torch import nn
+
+__all__ = [
+	"resnet18",
+	"resnet34",
+	"resnet50",
+	"resnet101",
+	"resnet152",
+	"resnext50_32x4d",
+	"resnext101_32x8d",
+	"resnet50_fc512",
+]
+
+model_urls = {
+	"resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
+	"resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
+	"resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
+	"resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
+	"resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
+	"resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
+	"resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+}
+
+# ...existing code for conv3x3, conv1x1, BasicBlock, Bottleneck, ResNet, init_pretrained_weights, and instantiation functions...
+
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+	return nn.Conv2d(
+		in_planes,
+		out_planes,
+		kernel_size=3,
+		stride=stride,
+		padding=dilation,
+		groups=groups,
+		bias=False,
+		dilation=dilation,
+	)
+
+def conv1x1(in_planes, out_planes, stride=1):
+	return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+class BasicBlock(nn.Module):
+	expansion = 1
+	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
+		super(BasicBlock, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		if groups != 1 or base_width != 64:
+			raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+		if dilation > 1:
+			raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+		self.conv1 = conv3x3(inplanes, planes, stride)
+		self.bn1 = norm_layer(planes)
+		self.relu = nn.ReLU(inplace=True)
+		self.conv2 = conv3x3(planes, planes)
+		self.bn2 = norm_layer(planes)
+		self.downsample = downsample
+		self.stride = stride
+	def forward(self, x):
+		identity = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		if self.downsample is not None:
+			identity = self.downsample(x)
+		out += identity
+		out = self.relu(out)
+		return out
+
+class Bottleneck(nn.Module):
+	expansion = 4
+	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
+		super(Bottleneck, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		width = int(planes * (base_width / 64.0)) * groups
+		self.conv1 = conv1x1(inplanes, width)
+		self.bn1 = norm_layer(width)
+		self.conv2 = conv3x3(width, width, stride, groups, dilation)
+		self.bn2 = norm_layer(width)
+		self.conv3 = conv1x1(width, planes * self.expansion)
+		self.bn3 = norm_layer(planes * self.expansion)
+		self.relu = nn.ReLU(inplace=True)
+		self.downsample = downsample
+		self.stride = stride
+	def forward(self, x):
+		identity = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		out = self.relu(out)
+		out = self.conv3(out)
+		out = self.bn3(out)
+		if self.downsample is not None:
+			identity = self.downsample(x)
+		out += identity
+		out = self.relu(out)
+		return out
+
+class ResNet(nn.Module):
+	def __init__(self, num_classes, loss, block, layers, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, last_stride=2, fc_dims=None, dropout_p=None, **kwargs):
+		super(ResNet, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		self._norm_layer = norm_layer
+		self.loss = loss
+		self.feature_dim = 512 * block.expansion
+		self.inplanes = 64
+		self.dilation = 1
+		if replace_stride_with_dilation is None:
+			replace_stride_with_dilation = [False, False, False]
+		if len(replace_stride_with_dilation) != 3:
+			raise ValueError("replace_stride_with_dilation should be None or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+		self.groups = groups
+		self.base_width = width_per_group
+		self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
+		self.bn1 = norm_layer(self.inplanes)
+		self.relu = nn.ReLU(inplace=True)
+		self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+		self.layer1 = self._make_layer(block, 64, layers[0])
+		self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
+		self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
+		self.layer4 = self._make_layer(block, 512, layers[3], stride=last_stride, dilate=replace_stride_with_dilation[2])
+		self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
+		self.fc = self._construct_fc_layer(fc_dims, 512 * block.expansion, dropout_p)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+		self._init_params()
+		if zero_init_residual:
+			for m in self.modules():
+				if isinstance(m, Bottleneck):
+					nn.init.constant_(m.bn3.weight, 0)
+				elif isinstance(m, BasicBlock):
+					nn.init.constant_(m.bn2.weight, 0)
+	def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+		norm_layer = self._norm_layer
+		downsample = None
+		previous_dilation = self.dilation
+		if dilate:
+			self.dilation *= stride
+			stride = 1
+		if stride != 1 or self.inplanes != planes * block.expansion:
+			downsample = nn.Sequential(
+				conv1x1(self.inplanes, planes * block.expansion, stride),
+				norm_layer(planes * block.expansion),
+			)
+		layers = []
+		layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer))
+		self.inplanes = planes * block.expansion
+		for _ in range(1, blocks):
+			layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer))
+		return nn.Sequential(*layers)
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		if fc_dims is None:
+			self.feature_dim = input_dim
+			return None
+		assert isinstance(fc_dims, (list, tuple)), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU(inplace=True))
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+		self.feature_dim = fc_dims[-1]
+		return nn.Sequential(*layers)
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.bn1(x)
+		x = self.relu(x)
+		x = self.maxpool(x)
+		x = self.layer1(x)
+		x = self.layer2(x)
+		x = self.layer3(x)
+		x = self.layer4(x)
+		return x
+	def forward(self, x):
+		f = self.featuremaps(x)
+		v = self.global_avgpool(f)
+		v = v.view(v.size(0), -1)
+		if self.fc is not None:
+			v = self.fc(v)
+		if not self.training:
+			return v
+		y = self.classifier(v)
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
+
+def init_pretrained_weights(model, model_url):
+	pretrain_dict = model_zoo.load_url(model_url)
+	model_dict = model.state_dict()
+	pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
+	model_dict.update(pretrain_dict)
+	model.load_state_dict(model_dict)
+
+def resnet18(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=BasicBlock, layers=[2, 2, 2, 2], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet18"])
+	return model
+
+def resnet34(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=BasicBlock, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet34"])
+	return model
+
+def resnet50(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet50"])
+	return model
+
+def resnet101(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet101"])
+	return model
+
+def resnet152(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 8, 36, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet152"])
+	return model
+
+def resnext50_32x4d(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=4, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnext50_32x4d"])
+	return model
+
+def resnext101_32x8d(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=8, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnext101_32x8d"])
+	return model
+
+def resnet50_fc512(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, fc_dims=[512], dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet50"])
+	return model
diff --git a/ethology/reid/backends/__init__.py b/ethology/reid/backends/__init__.py
new file mode 100644
index 00000000..f032d396
--- /dev/null
+++ b/ethology/reid/backends/__init__.py
@@ -0,0 +1 @@
+# Backends for ReID inference
diff --git a/ethology/reid/backends/base_backend.py b/ethology/reid/backends/base_backend.py
new file mode 100644
index 00000000..688ec43a
--- /dev/null
+++ b/ethology/reid/backends/base_backend.py
@@ -0,0 +1,170 @@
+
+import os
+from abc import abstractmethod
+from pathlib import Path
+import cv2
+import gdown
+import numpy as np
+import torch
+from filelock import SoftFileLock
+from ethology.reid.core.registry import ReIDModelRegistry
+# from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
+# from ethology.utils.checks import RequirementsChecker  # If needed, implement or set RequirementsChecker
+
+class BaseModelBackend:
+
+    def __init__(self, weights, device, half):
+        self.weights = weights[0] if isinstance(weights, list) else weights
+        if isinstance(self.weights, str):
+            self.weights = Path(self.weights)
+        # LOGGER.info(self.weights)
+        self.device = device
+        self.half = half
+        self.model = None
+        # Support both string and torch.device for device
+        if hasattr(self.device, 'type'):
+            self.cuda = torch.cuda.is_available() and self.device.type != "cpu"
+        else:
+            self.cuda = torch.cuda.is_available() and self.device != "cpu"
+
+        self.download_model(self.weights)
+        self.model_name = ReIDModelRegistry.get_model_name(self.weights)
+
+        self.model = ReIDModelRegistry.build_model(
+            self.model_name,
+            self.weights,
+            num_classes=ReIDModelRegistry.get_nr_classes(self.weights),
+            pretrained=not (self.weights and self.weights.is_file()),
+            use_gpu=device,
+        )
+        # self.checker = RequirementsChecker()
+
+        self.load_model(self.weights)
+
+        self.mean_array = torch.tensor([0.485, 0.456, 0.406], device=self.device).view(1, 3, 1, 1)
+        self.std_array = torch.tensor([0.229, 0.224, 0.225], device=self.device).view(1, 3, 1, 1)
+        if "clip" in self.model_name:
+            self.mean_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
+            self.std_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
+
+        if "vehicleid" in self.weights.name or "veri" in self.weights.name:
+            input_shape = (256, 256)
+        elif "lmbn" in self.model_name:
+            input_shape = (384, 128)
+        elif "hacnn" in self.model_name:
+            input_shape = (160, 64)
+        else:
+            input_shape = (256, 128)
+        self.input_shape = input_shape
+
+
+    def get_crops(self, xyxys, img):
+        h, w = img.shape[:2]
+        interpolation_method = cv2.INTER_LINEAR
+        num_crops = len(xyxys)
+        crops = torch.empty(
+            (num_crops, 3, *self.input_shape),
+            dtype=torch.half if self.half else torch.float,
+            device=self.device,
+        )
+        for i, box in enumerate(xyxys):
+            x1, y1, x2, y2 = box.round().astype("int")
+            x1, y1, x2, y2 = max(0, x1), max(0, y1), min(w, x2), min(h, y2)
+            crop = img[y1:y2, x1:x2]
+            crop = cv2.resize(
+                crop,
+                (self.input_shape[1], self.input_shape[0]),
+                interpolation=interpolation_method,
+            )
+            crop = cv2.cvtColor(crop, cv2.COLOR_BGR2RGB)
+            crop = torch.from_numpy(crop).to(
+                self.device, dtype=torch.half if self.half else torch.float
+            )
+            crops[i] = torch.permute(crop, (2, 0, 1))
+        crops = crops / 255.0
+        crops = (crops - self.mean_array) / self.std_array
+        return crops
+
+
+    @torch.no_grad()
+    def get_features(self, xyxys, img):
+        if xyxys.size != 0:
+            crops = self.get_crops(xyxys, img)
+            crops = self.inference_preprocess(crops)
+            features = self.forward(crops)
+            features = self.inference_postprocess(features)
+        else:
+            features = np.array([])
+        features = features / np.linalg.norm(features, axis=-1, keepdims=True)
+        return features
+
+
+    def warmup(self, imgsz=[(256, 128, 3)]):
+        if self.device.type != "cpu":
+            im = np.random.randint(0, 255, *imgsz, dtype=np.uint8)
+            crops = self.get_crops(
+                xyxys=np.array([[0, 0, 64, 64], [0, 0, 128, 128]]), img=im
+            )
+            crops = self.inference_preprocess(crops)
+            self.forward(crops)
+
+
+    def to_numpy(self, x):
+        return x.cpu().numpy() if isinstance(x, torch.Tensor) else x
+
+
+    def inference_preprocess(self, x):
+        if self.half:
+            if isinstance(x, torch.Tensor):
+                if x.dtype != torch.float16:
+                    x = x.half()
+            elif isinstance(x, np.ndarray):
+                if x.dtype != np.float16:
+                    x = x.astype(np.float16)
+        if hasattr(self, 'nhwc') and self.nhwc:
+            if isinstance(x, torch.Tensor):
+                x = x.permute(0, 2, 3, 1)
+            elif isinstance(x, np.ndarray):
+                x = np.transpose(x, (0, 2, 3, 1))
+        return x
+
+
+    def inference_postprocess(self, features):
+        if isinstance(features, (list, tuple)):
+            return (
+                self.to_numpy(features[0]) if len(features) == 1 else [self.to_numpy(x) for x in features]
+            )
+        else:
+            return self.to_numpy(features)
+
+
+    @abstractmethod
+    def forward(self, im_batch):
+        raise NotImplementedError("This method should be implemented by subclasses.")
+
+
+    @abstractmethod
+    def load_model(self, w):
+        raise NotImplementedError("This method should be implemented by subclasses.")
+
+
+    def download_model(self, w):
+        if isinstance(w, str):
+            w = Path(w)
+        if w.suffix != ".pt":
+            return
+        model_url = ReIDModelRegistry.get_model_url(w)
+        lock = SoftFileLock(str(w) + ".lock", timeout=300)
+        with lock:
+            if w.exists() or "openvino" in w.name:
+                # LOGGER.info(f"[PID {os.getpid()}] Found existing ReID weights at {w}; skipping download.")
+                return
+            if model_url:
+                # LOGGER.info(f"[PID {os.getpid()}] Downloading ReID weights from {model_url} → {w}")
+                gdown.download(model_url, str(w), quiet=False)
+            else:
+                # LOGGER.error(
+                #     f"No URL associated with the chosen ReID weights ({w}).\n"
+                #     f"Choose one of the following:"
+                # )
+                ReIDModelRegistry.show_downloadable_models()
diff --git a/ethology/reid/backends/onnx_backend.py b/ethology/reid/backends/onnx_backend.py
new file mode 100644
index 00000000..c7c93017
--- /dev/null
+++ b/ethology/reid/backends/onnx_backend.py
@@ -0,0 +1,31 @@
+
+from ethology.reid.backends.base_backend import BaseModelBackend
+
+class ONNXBackend(BaseModelBackend):
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		# ONNXRuntime will attempt to use the first provider, and if it fails or is not
+		# available for some reason, it will fall back to the next provider in the list
+		if self.device.type == "mps":
+			# self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
+			providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
+		elif self.device.type == "cuda":
+			# self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
+			providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+		else:
+			# self.checker.check_packages(("onnxruntime==1.18.1",))
+			providers = ["CPUExecutionProvider"]
+		import onnxruntime
+		self.session = onnxruntime.InferenceSession(str(w), providers=providers)
+
+	def forward(self, im_batch):
+		im_batch = im_batch.cpu().numpy()
+		features = self.session.run(
+			[self.session.get_outputs()[0].name],
+			{self.session.get_inputs()[0].name: im_batch},
+		)[0]
+		return features
diff --git a/ethology/reid/backends/openvino_backend.py b/ethology/reid/backends/openvino_backend.py
new file mode 100644
index 00000000..f06392bf
--- /dev/null
+++ b/ethology/reid/backends/openvino_backend.py
@@ -0,0 +1,48 @@
+from pathlib import Path
+
+from ethology.reid.backends.base_backend import BaseModelBackend
+# Note: LOGGER can be replaced with print or a local logger if needed
+
+class OpenVinoBackend(BaseModelBackend):
+
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		# self.checker.check_packages(("openvino>=2025.2.0",))
+
+		print(f"Loading {w} for OpenVINO inference...")
+		try:
+			# requires openvino-dev: https://pypi.org/project/openvino-dev/
+			from openvino import Core, Layout
+		except ImportError:
+			print(
+				f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
+				"requires openvino pip package to be installed!\n"
+				"$ pip install openvino>=2025.2.0\n"
+			)
+			raise
+		ie = Core()
+		w = Path(w)
+		print(w)
+		if w.suffix == '.bin':
+			w = w.with_suffix('.xml')
+
+		if not w.is_file():  # if not *.xml
+			w = next(
+				Path(w).glob("*.xml")
+			)  # get *.xml file from *_openvino_model dir
+		network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
+		if network.get_parameters()[0].get_layout().empty:
+			network.get_parameters()[0].set_layout(Layout("NCWH"))
+		self.executable_network = ie.compile_model(
+			network, device_name="CPU"
+		)  # device_name="MYRIAD" for Intel NCS2
+		self.output_layer = next(iter(self.executable_network.outputs))
+
+	def forward(self, im_batch):
+		im_batch = im_batch.cpu().numpy()  # FP32
+		features = self.executable_network([im_batch])[self.output_layer]
+		return features
diff --git a/ethology/reid/backends/pytorch_backend.py b/ethology/reid/backends/pytorch_backend.py
new file mode 100644
index 00000000..d3dbfa06
--- /dev/null
+++ b/ethology/reid/backends/pytorch_backend.py
@@ -0,0 +1,20 @@
+from ethology.reid.backends.base_backend import BaseModelBackend
+from ethology.reid.core.registry import ReIDModelRegistry
+
+class PyTorchBackend(BaseModelBackend):
+
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		# Load a PyTorch model
+		if w and w.is_file():
+			ReIDModelRegistry.load_pretrained_weights(self.model, w)
+		self.model.to(self.device).eval()
+		self.model.half() if self.half else self.model.float()
+
+	def forward(self, im_batch):
+		features = self.model(im_batch)
+		return features
diff --git a/ethology/reid/backends/tensorrt_backend.py b/ethology/reid/backends/tensorrt_backend.py
new file mode 100644
index 00000000..8dd7d7ee
--- /dev/null
+++ b/ethology/reid/backends/tensorrt_backend.py
@@ -0,0 +1,310 @@
+from collections import OrderedDict, namedtuple
+
+import numpy as np
+import torch
+
+from ethology.reid.backends.base_backend import BaseModelBackend
+# Note: LOGGER can be replaced with print or a local logger if needed
+
+import os
+import sys
+import torch
+import numpy as np
+from collections import namedtuple, OrderedDict
+
+
+
+Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
+
+
+class TensorRTBackend(BaseModelBackend):
+	def __init__(self, engine_path, device=None):
+		import hashlib
+		import requests
+		self.device = device or (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
+		self.fp16 = False
+		self.model_ = None
+		self.context = None
+		self.bindings = None
+		self.binding_addrs = None
+		self.is_trt10 = False
+		# Download engine if engine_path is a URL
+		if engine_path.startswith("http://") or engine_path.startswith("https://"):
+			# Use a hash of the URL for filename
+			engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
+			filename = f"trt_engine_{engine_hash}.engine"
+			cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
+			os.makedirs(cache_dir, exist_ok=True)
+			cached_file = os.path.join(cache_dir, filename)
+			if not os.path.exists(cached_file):
+				print(f"[TensorRT] Downloading engine from {engine_path} to {cached_file}")
+				with requests.get(engine_path, stream=True) as r:
+					r.raise_for_status()
+					with open(cached_file, 'wb') as f:
+						for chunk in r.iter_content(chunk_size=8192):
+							f.write(chunk)
+			else:
+				print(f"[TensorRT] Using cached engine at {cached_file}")
+			self.engine_path = cached_file
+		else:
+			self.engine_path = engine_path
+		self.load_model(self.engine_path)
+
+	def load_model(self, w):
+		print(f"Loading {w} for TensorRT inference...")
+		try:
+			import tensorrt as trt
+			import pycuda.driver as cuda
+			import pycuda.autoinit  # noqa: F401
+		except ImportError:
+			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH.")
+
+		if self.device.type == "cpu":
+			if torch.cuda.is_available():
+				self.device = torch.device("cuda:0")
+			else:
+				raise ValueError("CUDA device not available for TensorRT inference.")
+
+		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
+		logger = trt.Logger(trt.Logger.INFO)
+
+		# Deserialize the engine
+		with open(w, "rb") as f:
+			with trt.Runtime(logger) as runtime:
+				self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+		# Execution context
+		self.context = self.model_.create_execution_context()
+		self.bindings = OrderedDict()
+
+		self.is_trt10 = not hasattr(self.model_, "num_bindings")
+		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
+
+		# Parse bindings
+		for index in num:
+			if self.is_trt10:
+				name = self.model_.get_tensor_name(index)
+				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
+					self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_tensor_shape(name))
+
+			else:
+				name = self.model_.get_binding_name(index)
+				dtype = trt.nptype(self.model_.get_binding_dtype(index))
+				is_input = self.model_.binding_is_input(index)
+
+				# Handle dynamic shapes
+				if is_input and -1 in self.model_.get_binding_shape(index):
+					profile_index = 0
+					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
+					self.context.set_binding_shape(index, opt_shape)
+
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_binding_shape(index))
+			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
+			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+
+		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+
+		# Execution context
+		self.context = self.model_.create_execution_context()
+		self.bindings = OrderedDict()
+
+		self.is_trt10 = not hasattr(self.model_, "num_bindings")
+		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
+
+		# Parse bindings
+		for index in num:
+			if self.is_trt10:
+				name = self.model_.get_tensor_name(index)
+				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
+						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_tensor_shape(name))
+
+			else:
+				name = self.model_.get_binding_name(index)
+				dtype = trt.nptype(self.model_.get_binding_dtype(index))
+				is_input = self.model_.binding_is_input(index)
+
+				# Handle dynamic shapes
+				if is_input and -1 in self.model_.get_binding_shape(index):
+					profile_index = 0
+					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
+					self.context.set_binding_shape(index, opt_shape)
+
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_binding_shape(index))
+			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
+			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+
+		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+
+	def forward(self, im_batch):
+		temp_im_batch = im_batch.clone()
+		batch_array = []
+		inp_batch = im_batch.shape[0]
+		out_batch = self.bindings["output"].shape[0]
+		resultant_features = []
+
+		# Divide batch to sub batches
+		while inp_batch > out_batch:
+			batch_array.append(temp_im_batch[:out_batch])
+			temp_im_batch = temp_im_batch[out_batch:]
+			inp_batch = temp_im_batch.shape[0]
+		if temp_im_batch.shape[0] > 0:
+			batch_array.append(temp_im_batch)
+
+		for temp_batch in batch_array:
+			# Adjust for dynamic shapes
+			if temp_batch.shape != self.bindings["images"].shape:
+				if self.is_trt10:
+					self.context.set_input_shape("images", temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
+				else:
+					i_in = self.model_.get_binding_index("images")
+					i_out = self.model_.get_binding_index("output")
+					self.context.set_binding_shape(i_in, temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					output_shape = tuple(self.context.get_binding_shape(i_out))
+					self.bindings["output"].data.resize_(output_shape)
+
+			s = self.bindings["images"].shape
+			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
+
+			self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+			# Execute inference
+			self.context.execute_v2(list(self.binding_addrs.values()))
+			features = self.bindings["output"].data
+			resultant_features.append(features.clone())
+
+		if len(resultant_features) == 1:
+			return resultant_features[0]
+		else:
+			rslt_features = torch.cat(resultant_features, dim=0)
+			rslt_features = rslt_features[: im_batch.shape[0]]
+			return rslt_features
+
+	def load_model(self, w):
+		print(f"Loading {w} for TensorRT inference...")
+		# self.checker.check_packages(("nvidia-tensorrt",))
+		try:
+			import tensorrt as trt  # TensorRT library
+		except ImportError:
+			raise ImportError("Please install tensorrt to use this backend.")
+
+		if self.device.type == "cpu":
+			if torch.cuda.is_available():
+				self.device = torch.device("cuda:0")
+			else:
+				raise ValueError("CUDA device not available for TensorRT inference.")
+
+		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
+		logger = trt.Logger(trt.Logger.INFO)
+
+		# Deserialize the engine
+		with open(w, "rb") as f:
+			with trt.Runtime(logger) as runtime:
+				self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+		# Execution context
+		self.context = self.model_.create_execution_context()
+		self.bindings = OrderedDict()
+
+		self.is_trt10 = not hasattr(self.model_, "num_bindings")
+		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
+
+		# Parse bindings
+		for index in num:
+			if self.is_trt10:
+				name = self.model_.get_tensor_name(index)
+				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
+						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_tensor_shape(name))
+
+			else:
+				name = self.model_.get_binding_name(index)
+				dtype = trt.nptype(self.model_.get_binding_dtype(index))
+				is_input = self.model_.binding_is_input(index)
+
+				# Handle dynamic shapes
+				if is_input and -1 in self.model_.get_binding_shape(index):
+					profile_index = 0
+					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
+					self.context.set_binding_shape(index, opt_shape)
+
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_binding_shape(index))
+			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
+			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+
+		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+
+	def forward(self, im_batch):
+		temp_im_batch = im_batch.clone()
+		batch_array = []
+		inp_batch = im_batch.shape[0]
+		out_batch = self.bindings["output"].shape[0]
+		resultant_features = []
+
+		# Divide batch to sub batches
+		while inp_batch > out_batch:
+			batch_array.append(temp_im_batch[:out_batch])
+			temp_im_batch = temp_im_batch[out_batch:]
+			inp_batch = temp_im_batch.shape[0]
+		if temp_im_batch.shape[0] > 0:
+			batch_array.append(temp_im_batch)
+
+		for temp_batch in batch_array:
+			# Adjust for dynamic shapes
+			if temp_batch.shape != self.bindings["images"].shape:
+				if self.is_trt10:
+					self.context.set_input_shape("images", temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
+				else:
+					i_in = self.model_.get_binding_index("images")
+					i_out = self.model_.get_binding_index("output")
+					self.context.set_binding_shape(i_in, temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					output_shape = tuple(self.context.get_binding_shape(i_out))
+					self.bindings["output"].data.resize_(output_shape)
+
+			s = self.bindings["images"].shape
+			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
+
+			self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+			# Execute inference
+			self.context.execute_v2(list(self.binding_addrs.values()))
+			features = self.bindings["output"].data
+			resultant_features.append(features.clone())
+
+		if len(resultant_features) == 1:
+			return resultant_features[0]
+		else:
+			rslt_features = torch.cat(resultant_features, dim=0)
+			rslt_features = rslt_features[: im_batch.shape[0]]
+			return rslt_features
diff --git a/ethology/reid/backends/tflite_backend.py b/ethology/reid/backends/tflite_backend.py
new file mode 100644
index 00000000..b0a7b707
--- /dev/null
+++ b/ethology/reid/backends/tflite_backend.py
@@ -0,0 +1,40 @@
+from pathlib import Path
+
+import numpy as np
+import torch
+
+from ethology.reid.backends.base_backend import BaseModelBackend
+# Note: LOGGER can be replaced with print or a local logger if needed
+
+class TFLiteBackend(BaseModelBackend):
+	"""
+	A class to handle TensorFlow Lite model inference with dynamic batch size support.
+	"""
+	def __init__(self, weights: Path, device: str, half: bool):
+		super().__init__(weights, device, half)
+		self.nhwc = True
+		self.half = False
+
+	def load_model(self, w):
+		# self.checker.check_packages(("tensorflow",))
+		print(f"Loading {str(w)} for TensorFlow Lite inference...")
+		import tensorflow as tf
+		self.interpreter = tf.lite.Interpreter(model_path=str(w))
+		self.interpreter.allocate_tensors()
+		self.input_details = self.interpreter.get_input_details()
+		self.output_details = self.interpreter.get_output_details()
+		self.current_allocated_batch_size = self.input_details[0]["shape"][0]
+
+	def forward(self, im_batch: torch.Tensor) -> np.ndarray:
+		im_batch = im_batch.cpu().numpy()
+		batch_size = im_batch.shape[0]
+		if batch_size != self.current_allocated_batch_size:
+			self.interpreter.resize_tensor_input(
+				self.input_details[0]["index"], [batch_size, 256, 128, 3]
+			)
+			self.interpreter.allocate_tensors()
+			self.current_allocated_batch_size = batch_size
+		self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
+		self.interpreter.invoke()
+		features = self.interpreter.get_tensor(self.output_details[0]["index"])
+		return features
diff --git a/ethology/reid/backends/torchscript_backend.py b/ethology/reid/backends/torchscript_backend.py
new file mode 100644
index 00000000..b6602171
--- /dev/null
+++ b/ethology/reid/backends/torchscript_backend.py
@@ -0,0 +1,20 @@
+import torch
+
+from ethology.reid.backends.base_backend import BaseModelBackend
+# Note: LOGGER can be replaced with print or a local logger if needed
+
+class TorchscriptBackend(BaseModelBackend):
+
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		print(f"Loading {w} for TorchScript inference...")
+		self.model = torch.jit.load(w)
+		self.model.half() if self.half else self.model.float()
+
+	def forward(self, im_batch):
+		features = self.model(im_batch)
+		return features
diff --git a/ethology/reid/core/__init__.py b/ethology/reid/core/__init__.py
new file mode 100644
index 00000000..9dab06ec
--- /dev/null
+++ b/ethology/reid/core/__init__.py
@@ -0,0 +1 @@
+# Core logic for ReID
diff --git a/ethology/reid/core/auto_backend.py b/ethology/reid/core/auto_backend.py
new file mode 100644
index 00000000..6f43eba2
--- /dev/null
+++ b/ethology/reid/core/auto_backend.py
@@ -0,0 +1,74 @@
+
+from pathlib import Path
+from typing import Tuple, Union
+import torch
+from ethology.reid.backends.onnx_backend import ONNXBackend
+from ethology.reid.backends.openvino_backend import OpenVinoBackend
+from ethology.reid.backends.pytorch_backend import PyTorchBackend
+try:
+	from ethology.reid.backends.tensorrt_backend import TensorRTBackend
+except ImportError:
+	class TensorRTBackend:
+		def __init__(self, *args, **kwargs):
+			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH.")
+from ethology.reid.backends.tflite_backend import TFLiteBackend
+from ethology.reid.backends.torchscript_backend import TorchscriptBackend
+# from ethology.reid.core import export_formats  # If needed, implement or copy export_formats
+# from ethology.utils import WEIGHTS  # If needed, implement or set WEIGHTS
+# from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
+# from ethology.utils.torch_utils import select_device  # If needed, implement or set select_device
+
+class ReidAutoBackend:
+	def __init__(
+		self,
+		weights: Path,
+		device: torch.device = torch.device("cpu"),
+		half: bool = False,
+	):
+		super().__init__()
+		w = weights[0] if isinstance(weights, list) else weights
+		(
+			self.pt,
+			self.pth,
+			self.jit,
+			self.onnx,
+			self.xml,
+			self.engine,
+			self.tflite,
+		) = self.model_type(w)
+		self.weights = weights
+		self.device = device  # For simplicity, skip select_device for now
+		self.half = half
+		self.model = self.get_backend()
+
+	def get_backend(self):
+		backend_map = {
+			self.pt or self.pth: PyTorchBackend,
+			self.jit: TorchscriptBackend,
+			self.onnx: ONNXBackend,
+			self.engine: TensorRTBackend,
+			self.xml: OpenVinoBackend,
+			self.tflite: TFLiteBackend,
+		}
+		for condition, backend_class in backend_map.items():
+			if condition:
+				return backend_class(self.weights, self.device, self.half)
+		raise RuntimeError("This model framework is not supported yet!")
+
+	def check_suffix(self, file: Path = "osnet_x0_25_msmt17.pt", suffix: Union[str, Tuple[str, ...]] = (".pt",), msg: str = ""):
+		suffix = [suffix] if isinstance(suffix, str) else list(suffix)
+		files = [file] if isinstance(file, (str, Path)) else list(file)
+		for f in files:
+			file_suffix = Path(f).suffix.lower()
+			if file_suffix and file_suffix not in suffix:
+				print(f"File {f} does not have an acceptable suffix. Expected: {suffix}")
+
+	def model_type(self, p: Path) -> Tuple[bool, ...]:
+		# For demo, just check for .pt
+		sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
+		self.check_suffix(p, sf)
+		types = [str(Path(p)).endswith(s) for s in sf]
+		# OpenVINO explicit check
+		if Path(p).suffix in ['.xml', '.bin']:
+			types[3] = True
+		return tuple(types)
diff --git a/ethology/reid/core/config.py b/ethology/reid/core/config.py
new file mode 100644
index 00000000..926c0cc9
--- /dev/null
+++ b/ethology/reid/core/config.py
@@ -0,0 +1,16 @@
+MODEL_TYPES = [
+	"resnet50",
+	"resnet101",
+	"mlfn",
+	"hacnn",
+	"mobilenetv2_x1_0",
+	"mobilenetv2_x1_4",
+	"osnet_x1_0",
+	"osnet_x0_75",
+	"osnet_x0_5",
+	"osnet_x0_25",
+	"osnet_ibn_x1_0",
+	"osnet_ain_x1_0",
+	"lmbn_n",
+	"clip",
+]
diff --git a/ethology/reid/core/factory.py b/ethology/reid/core/factory.py
new file mode 100644
index 00000000..bc8b6ab1
--- /dev/null
+++ b/ethology/reid/core/factory.py
@@ -0,0 +1,30 @@
+
+# Import model constructors from ethology's local backbones
+from ethology.reid.backbones.hacnn import HACNN
+from ethology.reid.backbones.mlfn import mlfn
+from ethology.reid.backbones.mobilenetv2 import mobilenetv2_x1_0, mobilenetv2_x1_4
+from ethology.reid.backbones.osnet import osnet_ibn_x1_0, osnet_x0_5, osnet_x0_25, osnet_x0_75, osnet_x1_0
+from ethology.reid.backbones.osnet_ain import osnet_ain_x0_5, osnet_ain_x0_25, osnet_ain_x0_75, osnet_ain_x1_0
+from ethology.reid.backbones.resnet import resnet50, resnet101
+# from ethology.reid.backbones.lmbn.lmbn_n import LMBN_n  # If present
+# from ethology.reid.backbones.clip.make_model import make_model  # If present
+
+MODEL_FACTORY = {
+	"resnet50": resnet50,
+	"resnet101": resnet101,
+	"mobilenetv2_x1_0": mobilenetv2_x1_0,
+	"mobilenetv2_x1_4": mobilenetv2_x1_4,
+	"hacnn": HACNN,
+	"mlfn": mlfn,
+	"osnet_x1_0": osnet_x1_0,
+	"osnet_x0_75": osnet_x0_75,
+	"osnet_x0_5": osnet_x0_5,
+	"osnet_x0_25": osnet_x0_25,
+	"osnet_ibn_x1_0": osnet_ibn_x1_0,
+	"osnet_ain_x1_0": osnet_ain_x1_0,
+	"osnet_ain_x0_75": osnet_ain_x0_75,
+	"osnet_ain_x0_5": osnet_ain_x0_5,
+	"osnet_ain_x0_25": osnet_ain_x0_25,
+	# "lmbn_n": LMBN_n,  # Uncomment if implemented
+	# "clip": make_model,  # Uncomment if implemented
+}
diff --git a/ethology/reid/core/handler.py b/ethology/reid/core/handler.py
new file mode 100644
index 00000000..b5e51391
--- /dev/null
+++ b/ethology/reid/core/handler.py
@@ -0,0 +1,33 @@
+# Main handler for ReID in ethology
+
+# Thin wrapper to use BoxMOT ReID models in ethology
+from pathlib import Path
+from typing import Union
+import numpy as np
+
+
+# Import ethology's local ReID handler
+from ethology.reid.core.reid_handler import ReID as EthologyReID
+
+class ReIDHandler:
+    """
+    Ethology ReID handler using local models and backends.
+    """
+    def __init__(self, weights: Union[str, Path], device='cpu', half=False):
+        self.model = EthologyReID(weights=weights, device=device, half=half)
+
+    def extract_features(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
+        """
+        Extract feature embeddings for detections in a frame.
+        Parameters
+        ----------
+        frame : np.ndarray
+            (H, W, C) BGR image.
+        dets : np.ndarray
+            (N, 6) array of detections (x1, y1, x2, y2, conf, cls).
+        Returns
+        -------
+        np.ndarray
+            (N, D) feature embeddings.
+        """
+        return self.model(frame, dets)
diff --git a/ethology/reid/core/registry.py b/ethology/reid/core/registry.py
new file mode 100644
index 00000000..333cff2f
--- /dev/null
+++ b/ethology/reid/core/registry.py
@@ -0,0 +1,71 @@
+
+from collections import OrderedDict
+import torch
+from ethology.reid.core.config import MODEL_TYPES  #, NR_CLASSES_DICT, TRAINED_URLS
+from ethology.reid.core.factory import MODEL_FACTORY
+# from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
+
+class ReIDModelRegistry:
+	"""Encapsulates model registration and related utilities."""
+
+	@staticmethod
+	def show_downloadable_models():
+		# LOGGER.info("Available .pt ReID models for automatic download")
+		# LOGGER.info(list(TRAINED_URLS.keys()))
+		pass
+
+	@staticmethod
+	def get_model_name(model):
+		for name in MODEL_TYPES:
+			if name in model.name:
+				return name
+		return None
+
+	@staticmethod
+	def get_model_url(model):
+		# return TRAINED_URLS.get(model.name, None)
+		return None
+
+	@staticmethod
+	def load_pretrained_weights(model, weight_path):
+		device = "cpu" if not torch.cuda.is_available() else None
+		checkpoint = torch.load(
+			weight_path,
+			map_location=torch.device("cpu") if device == "cpu" else None,
+			weights_only=False,
+			encoding='latin1',
+		)
+		state_dict = checkpoint.get("state_dict", checkpoint)
+		model_dict = model.state_dict()
+		new_state_dict = OrderedDict()
+		matched_layers, discarded_layers = [], []
+		for k, v in state_dict.items():
+			key = k[7:] if k.startswith("module.") else k
+			if key in model_dict and model_dict[key].size() == v.size():
+				new_state_dict[key] = v
+				matched_layers.append(key)
+			else:
+				discarded_layers.append(key)
+		model_dict.update(new_state_dict)
+		model.load_state_dict(model_dict)
+
+	@staticmethod
+	def show_available_models():
+		# LOGGER.info("Available models:")
+		# LOGGER.info(list(MODEL_FACTORY.keys()))
+		pass
+
+	@staticmethod
+	def get_nr_classes(weights):
+		# dataset_key = weights.name.split("_")[1]
+		# return NR_CLASSES_DICT.get(dataset_key, 1)
+		return 1
+
+	@staticmethod
+	def build_model(name, weights, num_classes, loss="softmax", pretrained=True, use_gpu=True):
+		if name not in MODEL_FACTORY:
+			available = list(MODEL_FACTORY.keys())
+			raise KeyError(f"Unknown model '{name}'. Must be one of {available}")
+		return MODEL_FACTORY[name](
+			num_classes=num_classes, loss=loss, pretrained=pretrained, use_gpu=use_gpu
+		)
diff --git a/ethology/reid/core/reid_handler.py b/ethology/reid/core/reid_handler.py
new file mode 100644
index 00000000..2c72658a
--- /dev/null
+++ b/ethology/reid/core/reid_handler.py
@@ -0,0 +1,28 @@
+
+from pathlib import Path
+from typing import Union
+import numpy as np
+from ethology.reid.core.auto_backend import ReidAutoBackend
+
+class ReID:
+	def __init__(self, weights: Union[str, Path], device='cpu', half=False):
+		self.weights = Path(weights)
+		self.device = device
+		self.half = half
+		self.backend = ReidAutoBackend(weights=self.weights, device=device, half=half)
+		self.model = self.backend.model
+
+	def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
+		"""
+		Extract features for detections in a frame.
+		Args:
+			frame: (H, W, C) BGR image
+			dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
+		Returns:
+			embs: (N, D) embeddings.
+		"""
+		if dets.shape[0] == 0:
+			return np.empty((0, 0))
+		xyxy = dets[:, :4]
+		embs = self.model.get_features(xyxy, frame)
+		return embs
diff --git a/tests/test_unit/test_reid_handler.py b/tests/test_unit/test_reid_handler.py
new file mode 100644
index 00000000..3a5146cf
--- /dev/null
+++ b/tests/test_unit/test_reid_handler.py
@@ -0,0 +1,12 @@
+import numpy as np
+from ethology.reid.core.handler import ReIDHandler
+
+def test_extract_features_shape():
+    handler = ReIDHandler(weights='osnet_x0_25_imagenet.pth')
+    frame = np.random.randint(0, 255, (128, 64, 3), dtype=np.uint8)
+    dets = np.array([
+        [10, 10, 50, 100, 0.9, 1],
+        [60, 20, 100, 110, 0.8, 2],
+    ])
+    feats = handler.extract_features(frame, dets)
+    assert feats.shape[0] == dets.shape[0]

From dd0c1bd106d1696aafe88da17f5a439c5bf78021 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Feb 2026 13:41:00 +0000
Subject: [PATCH 06/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/reid/backbones/hacnn.py              | 561 ++++++------
 ethology/reid/backbones/mlfn.py               | 463 +++++-----
 ethology/reid/backbones/mobilenetv2.py        | 456 +++++-----
 ethology/reid/backbones/osnet.py              | 800 ++++++++++-------
 ethology/reid/backbones/osnet_ain.py          | 831 +++++++++++-------
 ethology/reid/backbones/resnet.py             | 652 +++++++++-----
 ethology/reid/backends/base_backend.py        |  48 +-
 ethology/reid/backends/onnx_backend.py        |  55 +-
 ethology/reid/backends/openvino_backend.py    |  83 +-
 ethology/reid/backends/pytorch_backend.py     |  28 +-
 ethology/reid/backends/tensorrt_backend.py    | 688 ++++++++-------
 ethology/reid/backends/tflite_backend.py      |  64 +-
 ethology/reid/backends/torchscript_backend.py |  25 +-
 ethology/reid/core/auto_backend.py            | 127 +--
 ethology/reid/core/config.py                  |  28 +-
 ethology/reid/core/factory.py                 |  56 +-
 ethology/reid/core/handler.py                 |  21 +-
 ethology/reid/core/registry.py                | 133 +--
 ethology/reid/core/reid_handler.py            |  51 +-
 tests/test_unit/test_reid_handler.py          |  14 +-
 20 files changed, 3005 insertions(+), 2179 deletions(-)

diff --git a/ethology/reid/backbones/hacnn.py b/ethology/reid/backbones/hacnn.py
index e48ea2b7..9394ad30 100644
--- a/ethology/reid/backbones/hacnn.py
+++ b/ethology/reid/backbones/hacnn.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import torch
 from torch import nn
@@ -10,283 +9,315 @@
 
 
 class ConvBlock(nn.Module):
-	def __init__(self, in_c, out_c, k, s=1, p=0):
-		super(ConvBlock, self).__init__()
-		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
-		self.bn = nn.BatchNorm2d(out_c)
-	def forward(self, x):
-		return F.relu(self.bn(self.conv(x)))
+    def __init__(self, in_c, out_c, k, s=1, p=0):
+        super(ConvBlock, self).__init__()
+        self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
+        self.bn = nn.BatchNorm2d(out_c)
+
+    def forward(self, x):
+        return F.relu(self.bn(self.conv(x)))
+
 
 class InceptionA(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		super(InceptionA, self).__init__()
-		mid_channels = out_channels // 4
-		self.stream1 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-		)
-		self.stream2 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-		)
-		self.stream3 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-		)
-		self.stream4 = nn.Sequential(
-			nn.AvgPool2d(3, stride=1, padding=1),
-			ConvBlock(in_channels, mid_channels, 1),
-		)
-	def forward(self, x):
-		s1 = self.stream1(x)
-		s2 = self.stream2(x)
-		s3 = self.stream3(x)
-		s4 = self.stream4(x)
-		y = torch.cat([s1, s2, s3, s4], dim=1)
-		return y
+    def __init__(self, in_channels, out_channels):
+        super(InceptionA, self).__init__()
+        mid_channels = out_channels // 4
+        self.stream1 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+        )
+        self.stream2 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+        )
+        self.stream3 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+        )
+        self.stream4 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1),
+            ConvBlock(in_channels, mid_channels, 1),
+        )
+
+    def forward(self, x):
+        s1 = self.stream1(x)
+        s2 = self.stream2(x)
+        s3 = self.stream3(x)
+        s4 = self.stream4(x)
+        y = torch.cat([s1, s2, s3, s4], dim=1)
+        return y
+
 
 class InceptionB(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		super(InceptionB, self).__init__()
-		mid_channels = out_channels // 4
-		self.stream1 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
-		)
-		self.stream2 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
-		)
-		self.stream3 = nn.Sequential(
-			nn.MaxPool2d(3, stride=2, padding=1),
-			ConvBlock(in_channels, mid_channels * 2, 1),
-		)
-	def forward(self, x):
-		s1 = self.stream1(x)
-		s2 = self.stream2(x)
-		s3 = self.stream3(x)
-		y = torch.cat([s1, s2, s3], dim=1)
-		return y
+    def __init__(self, in_channels, out_channels):
+        super(InceptionB, self).__init__()
+        mid_channels = out_channels // 4
+        self.stream1 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+        )
+        self.stream2 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+        )
+        self.stream3 = nn.Sequential(
+            nn.MaxPool2d(3, stride=2, padding=1),
+            ConvBlock(in_channels, mid_channels * 2, 1),
+        )
+
+    def forward(self, x):
+        s1 = self.stream1(x)
+        s2 = self.stream2(x)
+        s3 = self.stream3(x)
+        y = torch.cat([s1, s2, s3], dim=1)
+        return y
+
 
 class SpatialAttn(nn.Module):
-	def __init__(self):
-		super(SpatialAttn, self).__init__()
-		self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
-		self.conv2 = ConvBlock(1, 1, 1)
-	def forward(self, x):
-		x = x.mean(1, keepdim=True)
-		x = self.conv1(x)
-		x = F.interpolate(
-			x, (x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
-		)
-		x = self.conv2(x)
-		return x
+    def __init__(self):
+        super(SpatialAttn, self).__init__()
+        self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
+        self.conv2 = ConvBlock(1, 1, 1)
+
+    def forward(self, x):
+        x = x.mean(1, keepdim=True)
+        x = self.conv1(x)
+        x = F.interpolate(
+            x,
+            (x.size(2) * 2, x.size(3) * 2),
+            mode="bilinear",
+            align_corners=True,
+        )
+        x = self.conv2(x)
+        return x
+
 
 class ChannelAttn(nn.Module):
-	def __init__(self, in_channels, reduction_rate=16):
-		super(ChannelAttn, self).__init__()
-		assert in_channels % reduction_rate == 0
-		self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
-		self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
-	def forward(self, x):
-		x = F.avg_pool2d(x, x.size()[2:])
-		x = self.conv1(x)
-		x = self.conv2(x)
-		return x
+    def __init__(self, in_channels, reduction_rate=16):
+        super(ChannelAttn, self).__init__()
+        assert in_channels % reduction_rate == 0
+        self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
+        self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
+
+    def forward(self, x):
+        x = F.avg_pool2d(x, x.size()[2:])
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+
 
 class SoftAttn(nn.Module):
-	def __init__(self, in_channels):
-		super(SoftAttn, self).__init__()
-		self.spatial_attn = SpatialAttn()
-		self.channel_attn = ChannelAttn(in_channels)
-		self.conv = ConvBlock(in_channels, in_channels, 1)
-	def forward(self, x):
-		y_spatial = self.spatial_attn(x)
-		y_channel = self.channel_attn(x)
-		y = y_spatial * y_channel
-		y = torch.sigmoid(self.conv(y))
-		return y
+    def __init__(self, in_channels):
+        super(SoftAttn, self).__init__()
+        self.spatial_attn = SpatialAttn()
+        self.channel_attn = ChannelAttn(in_channels)
+        self.conv = ConvBlock(in_channels, in_channels, 1)
+
+    def forward(self, x):
+        y_spatial = self.spatial_attn(x)
+        y_channel = self.channel_attn(x)
+        y = y_spatial * y_channel
+        y = torch.sigmoid(self.conv(y))
+        return y
+
 
 class HardAttn(nn.Module):
-	def __init__(self, in_channels):
-		super(HardAttn, self).__init__()
-		self.fc = nn.Linear(in_channels, 4 * 2)
-		self.init_params()
-	def init_params(self):
-		self.fc.weight.data.zero_()
-		self.fc.bias.data.copy_(
-			torch.tensor([0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float)
-		)
-	def forward(self, x):
-		x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
-		theta = torch.tanh(self.fc(x))
-		theta = theta.view(-1, 4, 2)
-		return theta
+    def __init__(self, in_channels):
+        super(HardAttn, self).__init__()
+        self.fc = nn.Linear(in_channels, 4 * 2)
+        self.init_params()
+
+    def init_params(self):
+        self.fc.weight.data.zero_()
+        self.fc.bias.data.copy_(
+            torch.tensor(
+                [0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float
+            )
+        )
+
+    def forward(self, x):
+        x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
+        theta = torch.tanh(self.fc(x))
+        theta = theta.view(-1, 4, 2)
+        return theta
+
 
 class HarmAttn(nn.Module):
-	def __init__(self, in_channels):
-		super(HarmAttn, self).__init__()
-		self.soft_attn = SoftAttn(in_channels)
-		self.hard_attn = HardAttn(in_channels)
-	def forward(self, x):
-		y_soft_attn = self.soft_attn(x)
-		theta = self.hard_attn(x)
-		return y_soft_attn, theta
+    def __init__(self, in_channels):
+        super(HarmAttn, self).__init__()
+        self.soft_attn = SoftAttn(in_channels)
+        self.hard_attn = HardAttn(in_channels)
+
+    def forward(self, x):
+        y_soft_attn = self.soft_attn(x)
+        theta = self.hard_attn(x)
+        return y_soft_attn, theta
+
 
 class HACNN(nn.Module):
-	def __init__(
-		self,
-		num_classes,
-		loss="softmax",
-		nchannels=[128, 256, 384],
-		feat_dim=512,
-		learn_region=True,
-		use_gpu=True,
-		**kwargs,
-	):
-		super(HACNN, self).__init__()
-		self.loss = loss
-		self.learn_region = learn_region
-		self.use_gpu = use_gpu
-		self.conv = ConvBlock(3, 32, 3, s=2, p=1)
-		self.inception1 = nn.Sequential(
-			InceptionA(32, nchannels[0]),
-			InceptionB(nchannels[0], nchannels[0]),
-		)
-		self.ha1 = HarmAttn(nchannels[0])
-		self.inception2 = nn.Sequential(
-			InceptionA(nchannels[0], nchannels[1]),
-			InceptionB(nchannels[1], nchannels[1]),
-		)
-		self.ha2 = HarmAttn(nchannels[1])
-		self.inception3 = nn.Sequential(
-			InceptionA(nchannels[1], nchannels[2]),
-			InceptionB(nchannels[2], nchannels[2]),
-		)
-		self.ha3 = HarmAttn(nchannels[2])
-		self.fc_global = nn.Sequential(
-			nn.Linear(nchannels[2], feat_dim),
-			nn.BatchNorm1d(feat_dim),
-			nn.ReLU(),
-		)
-		self.classifier_global = nn.Linear(feat_dim, num_classes)
-		if self.learn_region:
-			self.init_scale_factors()
-			self.local_conv1 = InceptionB(32, nchannels[0])
-			self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
-			self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
-			self.fc_local = nn.Sequential(
-				nn.Linear(nchannels[2] * 4, feat_dim),
-				nn.BatchNorm1d(feat_dim),
-				nn.ReLU(),
-			)
-			self.classifier_local = nn.Linear(feat_dim, num_classes)
-			self.feat_dim = feat_dim * 2
-		else:
-			self.feat_dim = feat_dim
-	def init_scale_factors(self):
-		self.scale_factors = []
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-	def stn(self, x, theta):
-		grid = F.affine_grid(theta, x.size())
-		x = F.grid_sample(x, grid)
-		return x
-	def transform_theta(self, theta_i, region_idx):
-		scale_factors = self.scale_factors[region_idx]
-		theta = torch.zeros(theta_i.size(0), 2, 3)
-		theta[:, :, :2] = scale_factors
-		theta[:, :, -1] = theta_i
-		if self.use_gpu:
-			theta = theta.to(next(self.parameters()).device)
-		return theta
-	def forward(self, x):
-		assert (
-			x.size(2) == 160 and x.size(3) == 64
-		), "Input size does not match, expected (160, 64) but got ({}, {})".format(
-			x.size(2), x.size(3)
-		)
-		x = self.conv(x)
-		x1 = self.inception1(x)
-		x1_attn, x1_theta = self.ha1(x1)
-		x1_out = x1 * x1_attn
-		if self.learn_region:
-			x1_local_list = []
-			for region_idx in range(4):
-				x1_theta_i = x1_theta[:, region_idx, :]
-				x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
-				x1_trans_i = self.stn(x, x1_theta_i)
-				x1_trans_i = F.interpolate(
-					x1_trans_i, (24, 28), mode="bilinear", align_corners=True
-				)
-				x1_local_i = self.local_conv1(x1_trans_i)
-				x1_local_list.append(x1_local_i)
-		x2 = self.inception2(x1_out)
-		x2_attn, x2_theta = self.ha2(x2)
-		x2_out = x2 * x2_attn
-		if self.learn_region:
-			x2_local_list = []
-			for region_idx in range(4):
-				x2_theta_i = x2_theta[:, region_idx, :]
-				x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
-				x2_trans_i = self.stn(x1_out, x2_theta_i)
-				x2_trans_i = F.interpolate(
-					x2_trans_i, (12, 14), mode="bilinear", align_corners=True
-				)
-				x2_local_i = x2_trans_i + x1_local_list[region_idx]
-				x2_local_i = self.local_conv2(x2_local_i)
-				x2_local_list.append(x2_local_i)
-		x3 = self.inception3(x2_out)
-		x3_attn, x3_theta = self.ha3(x3)
-		x3_out = x3 * x3_attn
-		if self.learn_region:
-			x3_local_list = []
-			for region_idx in range(4):
-				x3_theta_i = x3_theta[:, region_idx, :]
-				x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
-				x3_trans_i = self.stn(x2_out, x3_theta_i)
-				x3_trans_i = F.interpolate(
-					x3_trans_i, (6, 7), mode="bilinear", align_corners=True
-				)
-				x3_local_i = x3_trans_i + x2_local_list[region_idx]
-				x3_local_i = self.local_conv3(x3_local_i)
-				x3_local_list.append(x3_local_i)
-		x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
-			x3_out.size(0), x3_out.size(1)
-		)
-		x_global = self.fc_global(x_global)
-		if self.learn_region:
-			x_local_list = []
-			for region_idx in range(4):
-				x_local_i = x3_local_list[region_idx]
-				x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
-					x_local_i.size(0), -1
-				)
-				x_local_list.append(x_local_i)
-			x_local = torch.cat(x_local_list, 1)
-			x_local = self.fc_local(x_local)
-		if not self.training:
-			if self.learn_region:
-				x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
-				x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
-				return torch.cat([x_global, x_local], 1)
-			else:
-				return x_global
-		prelogits_global = self.classifier_global(x_global)
-		if self.learn_region:
-			prelogits_local = self.classifier_local(x_local)
-		if self.loss == "softmax":
-			if self.learn_region:
-				return (prelogits_global, prelogits_local)
-			else:
-				return prelogits_global
-		elif self.loss == "triplet":
-			if self.learn_region:
-				return (prelogits_global, prelogits_local), (x_global, x_local)
-			else:
-				return prelogits_global, x_global
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        loss="softmax",
+        nchannels=[128, 256, 384],
+        feat_dim=512,
+        learn_region=True,
+        use_gpu=True,
+        **kwargs,
+    ):
+        super(HACNN, self).__init__()
+        self.loss = loss
+        self.learn_region = learn_region
+        self.use_gpu = use_gpu
+        self.conv = ConvBlock(3, 32, 3, s=2, p=1)
+        self.inception1 = nn.Sequential(
+            InceptionA(32, nchannels[0]),
+            InceptionB(nchannels[0], nchannels[0]),
+        )
+        self.ha1 = HarmAttn(nchannels[0])
+        self.inception2 = nn.Sequential(
+            InceptionA(nchannels[0], nchannels[1]),
+            InceptionB(nchannels[1], nchannels[1]),
+        )
+        self.ha2 = HarmAttn(nchannels[1])
+        self.inception3 = nn.Sequential(
+            InceptionA(nchannels[1], nchannels[2]),
+            InceptionB(nchannels[2], nchannels[2]),
+        )
+        self.ha3 = HarmAttn(nchannels[2])
+        self.fc_global = nn.Sequential(
+            nn.Linear(nchannels[2], feat_dim),
+            nn.BatchNorm1d(feat_dim),
+            nn.ReLU(),
+        )
+        self.classifier_global = nn.Linear(feat_dim, num_classes)
+        if self.learn_region:
+            self.init_scale_factors()
+            self.local_conv1 = InceptionB(32, nchannels[0])
+            self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
+            self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
+            self.fc_local = nn.Sequential(
+                nn.Linear(nchannels[2] * 4, feat_dim),
+                nn.BatchNorm1d(feat_dim),
+                nn.ReLU(),
+            )
+            self.classifier_local = nn.Linear(feat_dim, num_classes)
+            self.feat_dim = feat_dim * 2
+        else:
+            self.feat_dim = feat_dim
+
+    def init_scale_factors(self):
+        self.scale_factors = []
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+
+    def stn(self, x, theta):
+        grid = F.affine_grid(theta, x.size())
+        x = F.grid_sample(x, grid)
+        return x
+
+    def transform_theta(self, theta_i, region_idx):
+        scale_factors = self.scale_factors[region_idx]
+        theta = torch.zeros(theta_i.size(0), 2, 3)
+        theta[:, :, :2] = scale_factors
+        theta[:, :, -1] = theta_i
+        if self.use_gpu:
+            theta = theta.to(next(self.parameters()).device)
+        return theta
+
+    def forward(self, x):
+        assert x.size(2) == 160 and x.size(3) == 64, (
+            f"Input size does not match, expected (160, 64) but got ({x.size(2)}, {x.size(3)})"
+        )
+        x = self.conv(x)
+        x1 = self.inception1(x)
+        x1_attn, x1_theta = self.ha1(x1)
+        x1_out = x1 * x1_attn
+        if self.learn_region:
+            x1_local_list = []
+            for region_idx in range(4):
+                x1_theta_i = x1_theta[:, region_idx, :]
+                x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
+                x1_trans_i = self.stn(x, x1_theta_i)
+                x1_trans_i = F.interpolate(
+                    x1_trans_i, (24, 28), mode="bilinear", align_corners=True
+                )
+                x1_local_i = self.local_conv1(x1_trans_i)
+                x1_local_list.append(x1_local_i)
+        x2 = self.inception2(x1_out)
+        x2_attn, x2_theta = self.ha2(x2)
+        x2_out = x2 * x2_attn
+        if self.learn_region:
+            x2_local_list = []
+            for region_idx in range(4):
+                x2_theta_i = x2_theta[:, region_idx, :]
+                x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
+                x2_trans_i = self.stn(x1_out, x2_theta_i)
+                x2_trans_i = F.interpolate(
+                    x2_trans_i, (12, 14), mode="bilinear", align_corners=True
+                )
+                x2_local_i = x2_trans_i + x1_local_list[region_idx]
+                x2_local_i = self.local_conv2(x2_local_i)
+                x2_local_list.append(x2_local_i)
+        x3 = self.inception3(x2_out)
+        x3_attn, x3_theta = self.ha3(x3)
+        x3_out = x3 * x3_attn
+        if self.learn_region:
+            x3_local_list = []
+            for region_idx in range(4):
+                x3_theta_i = x3_theta[:, region_idx, :]
+                x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
+                x3_trans_i = self.stn(x2_out, x3_theta_i)
+                x3_trans_i = F.interpolate(
+                    x3_trans_i, (6, 7), mode="bilinear", align_corners=True
+                )
+                x3_local_i = x3_trans_i + x2_local_list[region_idx]
+                x3_local_i = self.local_conv3(x3_local_i)
+                x3_local_list.append(x3_local_i)
+        x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
+            x3_out.size(0), x3_out.size(1)
+        )
+        x_global = self.fc_global(x_global)
+        if self.learn_region:
+            x_local_list = []
+            for region_idx in range(4):
+                x_local_i = x3_local_list[region_idx]
+                x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
+                    x_local_i.size(0), -1
+                )
+                x_local_list.append(x_local_i)
+            x_local = torch.cat(x_local_list, 1)
+            x_local = self.fc_local(x_local)
+        if not self.training:
+            if self.learn_region:
+                x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
+                x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
+                return torch.cat([x_global, x_local], 1)
+            else:
+                return x_global
+        prelogits_global = self.classifier_global(x_global)
+        if self.learn_region:
+            prelogits_local = self.classifier_local(x_local)
+        if self.loss == "softmax":
+            if self.learn_region:
+                return (prelogits_global, prelogits_local)
+            else:
+                return prelogits_global
+        elif self.loss == "triplet":
+            if self.learn_region:
+                return (prelogits_global, prelogits_local), (x_global, x_local)
+            else:
+                return prelogits_global, x_global
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
index bb1e235c..8daad863 100644
--- a/ethology/reid/backbones/mlfn.py
+++ b/ethology/reid/backbones/mlfn.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 import torch
 import torch.utils.model_zoo as model_zoo
 from torch import nn
@@ -8,232 +7,266 @@
 
 __all__ = ["mlfn"]
 model_urls = {
-	# training epoch = 5, top1 = 51.6
-	"imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
+    # training epoch = 5, top1 = 51.6
+    "imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
 }
 
 
 class MLFNBlock(nn.Module):
-	def __init__(self, in_channels, out_channels, stride, fsm_channels, groups=32):
-		super(MLFNBlock, self).__init__()
-		self.groups = groups
-		mid_channels = out_channels // 2
-
-		# Factor Modules
-		self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False)
-		self.fm_bn1 = nn.BatchNorm2d(mid_channels)
-		self.fm_conv2 = nn.Conv2d(
-			mid_channels,
-			mid_channels,
-			3,
-			stride=stride,
-			padding=1,
-			bias=False,
-			groups=self.groups,
-		)
-		self.fm_bn2 = nn.BatchNorm2d(mid_channels)
-		self.fm_conv3 = nn.Conv2d(mid_channels, out_channels, 1, bias=False)
-		self.fm_bn3 = nn.BatchNorm2d(out_channels)
-
-		# Factor Selection Module
-		self.fsm = nn.Sequential(
-			nn.AdaptiveAvgPool2d(1),
-			nn.Conv2d(in_channels, fsm_channels[0], 1),
-			nn.BatchNorm2d(fsm_channels[0]),
-			nn.ReLU(inplace=True),
-			nn.Conv2d(fsm_channels[0], fsm_channels[1], 1),
-			nn.BatchNorm2d(fsm_channels[1]),
-			nn.ReLU(inplace=True),
-			nn.Conv2d(fsm_channels[1], self.groups, 1),
-			nn.BatchNorm2d(self.groups),
-			nn.Sigmoid(),
-		)
-
-		self.downsample = None
-		if in_channels != out_channels or stride > 1:
-			self.downsample = nn.Sequential(
-				nn.Conv2d(in_channels, out_channels, 1, stride=stride, bias=False),
-				nn.BatchNorm2d(out_channels),
-			)
-
-	def forward(self, x):
-		residual = x
-		s = self.fsm(x)
-
-		# reduce dimension
-		x = self.fm_conv1(x)
-		x = self.fm_bn1(x)
-		x = F.relu(x, inplace=True)
-
-		# group convolution
-		x = self.fm_conv2(x)
-		x = self.fm_bn2(x)
-		x = F.relu(x, inplace=True)
-
-		# factor selection
-		b, c = x.size(0), x.size(1)
-		n = c // self.groups
-		ss = s.repeat(1, n, 1, 1)  # from (b, g, 1, 1) to (b, g*n=c, 1, 1)
-		ss = ss.view(b, n, self.groups, 1, 1)
-		ss = ss.permute(0, 2, 1, 3, 4).contiguous()
-		ss = ss.view(b, c, 1, 1)
-		x = ss * x
-
-		# recover dimension
-		x = self.fm_conv3(x)
-		x = self.fm_bn3(x)
-		x = F.relu(x, inplace=True)
-
-		if self.downsample is not None:
-			residual = self.downsample(residual)
-
-		return F.relu(residual + x, inplace=True), s
+    def __init__(
+        self, in_channels, out_channels, stride, fsm_channels, groups=32
+    ):
+        super(MLFNBlock, self).__init__()
+        self.groups = groups
+        mid_channels = out_channels // 2
+
+        # Factor Modules
+        self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False)
+        self.fm_bn1 = nn.BatchNorm2d(mid_channels)
+        self.fm_conv2 = nn.Conv2d(
+            mid_channels,
+            mid_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=self.groups,
+        )
+        self.fm_bn2 = nn.BatchNorm2d(mid_channels)
+        self.fm_conv3 = nn.Conv2d(mid_channels, out_channels, 1, bias=False)
+        self.fm_bn3 = nn.BatchNorm2d(out_channels)
+
+        # Factor Selection Module
+        self.fsm = nn.Sequential(
+            nn.AdaptiveAvgPool2d(1),
+            nn.Conv2d(in_channels, fsm_channels[0], 1),
+            nn.BatchNorm2d(fsm_channels[0]),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(fsm_channels[0], fsm_channels[1], 1),
+            nn.BatchNorm2d(fsm_channels[1]),
+            nn.ReLU(inplace=True),
+            nn.Conv2d(fsm_channels[1], self.groups, 1),
+            nn.BatchNorm2d(self.groups),
+            nn.Sigmoid(),
+        )
+
+        self.downsample = None
+        if in_channels != out_channels or stride > 1:
+            self.downsample = nn.Sequential(
+                nn.Conv2d(
+                    in_channels, out_channels, 1, stride=stride, bias=False
+                ),
+                nn.BatchNorm2d(out_channels),
+            )
+
+    def forward(self, x):
+        residual = x
+        s = self.fsm(x)
+
+        # reduce dimension
+        x = self.fm_conv1(x)
+        x = self.fm_bn1(x)
+        x = F.relu(x, inplace=True)
+
+        # group convolution
+        x = self.fm_conv2(x)
+        x = self.fm_bn2(x)
+        x = F.relu(x, inplace=True)
+
+        # factor selection
+        b, c = x.size(0), x.size(1)
+        n = c // self.groups
+        ss = s.repeat(1, n, 1, 1)  # from (b, g, 1, 1) to (b, g*n=c, 1, 1)
+        ss = ss.view(b, n, self.groups, 1, 1)
+        ss = ss.permute(0, 2, 1, 3, 4).contiguous()
+        ss = ss.view(b, c, 1, 1)
+        x = ss * x
+
+        # recover dimension
+        x = self.fm_conv3(x)
+        x = self.fm_bn3(x)
+        x = F.relu(x, inplace=True)
+
+        if self.downsample is not None:
+            residual = self.downsample(residual)
+
+        return F.relu(residual + x, inplace=True), s
 
 
 class MLFN(nn.Module):
-	"""Multi-Level Factorisation Net.
-
-	Reference:
-		Chang et al. Multi-Level Factorisation Net for
-		Person Re-Identification. CVPR 2018.
-
-	Public keys:
-		- ``mlfn``: MLFN (Multi-Level Factorisation Net).
-	"""
-
-	def __init__(
-		self,
-		num_classes,
-		loss="softmax",
-		groups=32,
-		channels=[64, 256, 512, 1024, 2048],
-		embed_dim=1024,
-		**kwargs,
-	):
-		super(MLFN, self).__init__()
-		self.loss = loss
-		self.groups = groups
-
-		# first convolutional layer
-		self.conv1 = nn.Conv2d(3, channels[0], 7, stride=2, padding=3)
-		self.bn1 = nn.BatchNorm2d(channels[0])
-		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-
-		# main body
-		self.feature = nn.ModuleList(
-			[
-				# layer 1-3
-				MLFNBlock(channels[0], channels[1], 1, [128, 64], self.groups),
-				MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
-				MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
-				# layer 4-7
-				MLFNBlock(channels[1], channels[2], 2, [256, 128], self.groups),
-				MLFNBlock(channels[2], channels[2], 1, [256, 128], self.groups),
-				MLFNBlock(channels[2], channels[2], 1, [256, 128], self.groups),
-				MLFNBlock(channels[2], channels[2], 1, [256, 128], self.groups),
-				# layer 8-13
-				MLFNBlock(channels[2], channels[3], 2, [512, 128], self.groups),
-				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
-				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
-				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
-				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
-				MLFNBlock(channels[3], channels[3], 1, [512, 128], self.groups),
-				# layer 14-16
-				MLFNBlock(channels[3], channels[4], 2, [512, 128], self.groups),
-				MLFNBlock(channels[4], channels[4], 1, [512, 128], self.groups),
-				MLFNBlock(channels[4], channels[4], 1, [512, 128], self.groups),
-			]
-		)
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-
-		# projection functions
-		self.fc_x = nn.Sequential(
-			nn.Conv2d(channels[4], embed_dim, 1, bias=False),
-			nn.BatchNorm2d(embed_dim),
-			nn.ReLU(inplace=True),
-		)
-		self.fc_s = nn.Sequential(
-			nn.Conv2d(self.groups * 16, embed_dim, 1, bias=False),
-			nn.BatchNorm2d(embed_dim),
-			nn.ReLU(inplace=True),
-		)
-
-		self.classifier = nn.Linear(embed_dim, num_classes)
-
-		self.init_params()
-
-	def init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-
-	def forward(self, x):
-		x = self.conv1(x)
-		x = self.bn1(x)
-		x = F.relu(x, inplace=True)
-		x = self.maxpool(x)
-
-		s_hat = []
-		for block in self.feature:
-			x, s = block(x)
-			s_hat.append(s)
-		s_hat = torch.cat(s_hat, 1)
-
-		x = self.global_avgpool(x)
-		x = self.fc_x(x)
-		s_hat = self.fc_s(s_hat)
-
-		v = (x + s_hat) * 0.5
-		v = v.view(v.size(0), -1)
-
-		if not self.training:
-			return v
-
-		y = self.classifier(v)
-
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    """Multi-Level Factorisation Net.
+
+    Reference:
+            Chang et al. Multi-Level Factorisation Net for
+            Person Re-Identification. CVPR 2018.
+
+    Public keys:
+            - ``mlfn``: MLFN (Multi-Level Factorisation Net).
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        loss="softmax",
+        groups=32,
+        channels=[64, 256, 512, 1024, 2048],
+        embed_dim=1024,
+        **kwargs,
+    ):
+        super(MLFN, self).__init__()
+        self.loss = loss
+        self.groups = groups
+
+        # first convolutional layer
+        self.conv1 = nn.Conv2d(3, channels[0], 7, stride=2, padding=3)
+        self.bn1 = nn.BatchNorm2d(channels[0])
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+
+        # main body
+        self.feature = nn.ModuleList(
+            [
+                # layer 1-3
+                MLFNBlock(channels[0], channels[1], 1, [128, 64], self.groups),
+                MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
+                MLFNBlock(channels[1], channels[1], 1, [128, 64], self.groups),
+                # layer 4-7
+                MLFNBlock(
+                    channels[1], channels[2], 2, [256, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[2], channels[2], 1, [256, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[2], channels[2], 1, [256, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[2], channels[2], 1, [256, 128], self.groups
+                ),
+                # layer 8-13
+                MLFNBlock(
+                    channels[2], channels[3], 2, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[3], channels[3], 1, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[3], channels[3], 1, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[3], channels[3], 1, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[3], channels[3], 1, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[3], channels[3], 1, [512, 128], self.groups
+                ),
+                # layer 14-16
+                MLFNBlock(
+                    channels[3], channels[4], 2, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[4], channels[4], 1, [512, 128], self.groups
+                ),
+                MLFNBlock(
+                    channels[4], channels[4], 1, [512, 128], self.groups
+                ),
+            ]
+        )
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+
+        # projection functions
+        self.fc_x = nn.Sequential(
+            nn.Conv2d(channels[4], embed_dim, 1, bias=False),
+            nn.BatchNorm2d(embed_dim),
+            nn.ReLU(inplace=True),
+        )
+        self.fc_s = nn.Sequential(
+            nn.Conv2d(self.groups * 16, embed_dim, 1, bias=False),
+            nn.BatchNorm2d(embed_dim),
+            nn.ReLU(inplace=True),
+        )
+
+        self.classifier = nn.Linear(embed_dim, num_classes)
+
+        self.init_params()
+
+    def init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = F.relu(x, inplace=True)
+        x = self.maxpool(x)
+
+        s_hat = []
+        for block in self.feature:
+            x, s = block(x)
+            s_hat.append(s)
+        s_hat = torch.cat(s_hat, 1)
+
+        x = self.global_avgpool(x)
+        x = self.fc_x(x)
+        s_hat = self.fc_s(s_hat)
+
+        v = (x + s_hat) * 0.5
+        v = v.view(v.size(0), -1)
+
+        if not self.training:
+            return v
+
+        y = self.classifier(v)
+
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
 
 
 def init_pretrained_weights(model, model_url):
-	"""Initializes model with pretrained weights.
+    """Initializes model with pretrained weights.
 
-	Layers that don't match with pretrained layers in name or size are kept unchanged.
-	"""
-	pretrain_dict = model_zoo.load_url(model_url)
-	model_dict = model.state_dict()
-	pretrain_dict = {
-		k: v
-		for k, v in pretrain_dict.items()
-		if k in model_dict and model_dict[k].size() == v.size()
-	}
-	model_dict.update(pretrain_dict)
-	model.load_state_dict(model_dict)
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    pretrain_dict = model_zoo.load_url(model_url)
+    model_dict = model.state_dict()
+    pretrain_dict = {
+        k: v
+        for k, v in pretrain_dict.items()
+        if k in model_dict and model_dict[k].size() == v.size()
+    }
+    model_dict.update(pretrain_dict)
+    model.load_state_dict(model_dict)
 
 
 def mlfn(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = MLFN(num_classes, loss, **kwargs)
-	if pretrained:
-		# init_pretrained_weights(model, model_urls['imagenet'])
-		import warnings
-
-		warnings.warn(
-			"The imagenet pretrained weights need to be manually downloaded from {}".format(
-				model_urls["imagenet"]
-			)
-		)
-	return model
+    model = MLFN(num_classes, loss, **kwargs)
+    if pretrained:
+        # init_pretrained_weights(model, model_urls['imagenet'])
+        import warnings
+
+        warnings.warn(
+            "The imagenet pretrained weights need to be manually downloaded from {}".format(
+                model_urls["imagenet"]
+            )
+        )
+    return model
+
+
 # Copied from boxmot/boxmot/reid/backbones/mlfn.py
diff --git a/ethology/reid/backbones/mobilenetv2.py b/ethology/reid/backbones/mobilenetv2.py
index 35a16219..b3e69186 100644
--- a/ethology/reid/backbones/mobilenetv2.py
+++ b/ethology/reid/backbones/mobilenetv2.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import torch.utils.model_zoo as model_zoo
 from torch import nn
@@ -9,239 +8,272 @@
 __all__ = ["mobilenetv2_x1_0", "mobilenetv2_x1_4"]
 
 model_urls = {
-	# 1.0: top-1 71.3
-	"mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
-	# 1.4: top-1 73.9
-	"mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
+    # 1.0: top-1 71.3
+    "mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
+    # 1.4: top-1 73.9
+    "mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
 }
 
 
 class ConvBlock(nn.Module):
-	"""Basic convolutional block.
+    """Basic convolutional block.
 
-	convolution (bias discarded) + batch normalization + relu6.
+    convolution (bias discarded) + batch normalization + relu6.
 
-	Args:
-		in_c (int): number of input channels.
-		out_c (int): number of output channels.
-		k (int or tuple): kernel size.
-		s (int or tuple): stride.
-		p (int or tuple): padding.
-		g (int): number of blocked connections from input channels
-			to output channels (default: 1).
-	"""
+    Args:
+            in_c (int): number of input channels.
+            out_c (int): number of output channels.
+            k (int or tuple): kernel size.
+            s (int or tuple): stride.
+            p (int or tuple): padding.
+            g (int): number of blocked connections from input channels
+                    to output channels (default: 1).
 
-	def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
-		super(ConvBlock, self).__init__()
-		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
-		self.bn = nn.BatchNorm2d(out_c)
+    """
 
-	def forward(self, x):
-		return F.relu6(self.bn(self.conv(x)))
+    def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
+        super(ConvBlock, self).__init__()
+        self.conv = nn.Conv2d(
+            in_c, out_c, k, stride=s, padding=p, bias=False, groups=g
+        )
+        self.bn = nn.BatchNorm2d(out_c)
+
+    def forward(self, x):
+        return F.relu6(self.bn(self.conv(x)))
 
 
 class Bottleneck(nn.Module):
-	def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
-		super(Bottleneck, self).__init__()
-		mid_channels = in_channels * expansion_factor
-		self.use_residual = stride == 1 and in_channels == out_channels
-		self.conv1 = ConvBlock(in_channels, mid_channels, 1)
-		self.dwconv2 = ConvBlock(
-			mid_channels, mid_channels, 3, stride, 1, g=mid_channels
-		)
-		self.conv3 = nn.Sequential(
-			nn.Conv2d(mid_channels, out_channels, 1, bias=False),
-			nn.BatchNorm2d(out_channels),
-		)
-
-	def forward(self, x):
-		m = self.conv1(x)
-		m = self.dwconv2(m)
-		m = self.conv3(m)
-		if self.use_residual:
-			return x + m
-		else:
-			return m
+    def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
+        super(Bottleneck, self).__init__()
+        mid_channels = in_channels * expansion_factor
+        self.use_residual = stride == 1 and in_channels == out_channels
+        self.conv1 = ConvBlock(in_channels, mid_channels, 1)
+        self.dwconv2 = ConvBlock(
+            mid_channels, mid_channels, 3, stride, 1, g=mid_channels
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(mid_channels, out_channels, 1, bias=False),
+            nn.BatchNorm2d(out_channels),
+        )
+
+    def forward(self, x):
+        m = self.conv1(x)
+        m = self.dwconv2(m)
+        m = self.conv3(m)
+        if self.use_residual:
+            return x + m
+        else:
+            return m
 
 
 class MobileNetV2(nn.Module):
-	"""MobileNetV2.
-
-	Reference:
-		Sandler et al. MobileNetV2: Inverted Residuals and
-		Linear Bottlenecks. CVPR 2018.
-
-	Public keys:
-		- ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
-		- ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
-	"""
-
-	def __init__(
-		self,
-		num_classes,
-		width_mult=1,
-		loss="softmax",
-		fc_dims=None,
-		dropout_p=None,
-		**kwargs,
-	):
-		super(MobileNetV2, self).__init__()
-		self.loss = loss
-		self.in_channels = int(32 * width_mult)
-		self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
-
-		# construct layers
-		self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
-		self.conv2 = self._make_layer(Bottleneck, 1, int(16 * width_mult), 1, 1)
-		self.conv3 = self._make_layer(Bottleneck, 6, int(24 * width_mult), 2, 2)
-		self.conv4 = self._make_layer(Bottleneck, 6, int(32 * width_mult), 3, 2)
-		self.conv5 = self._make_layer(Bottleneck, 6, int(64 * width_mult), 4, 2)
-		self.conv6 = self._make_layer(Bottleneck, 6, int(96 * width_mult), 3, 1)
-		self.conv7 = self._make_layer(Bottleneck, 6, int(160 * width_mult), 3, 2)
-		self.conv8 = self._make_layer(Bottleneck, 6, int(320 * width_mult), 1, 1)
-		self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
-
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc = self._construct_fc_layer(fc_dims, self.feature_dim, dropout_p)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-
-		self._init_params()
-
-	def _make_layer(self, block, t, c, n, s):
-		# t: expansion factor
-		# c: output channels
-		# n: number of blocks
-		# s: stride for first layer
-		layers = []
-		layers.append(block(self.in_channels, c, t, s))
-		self.in_channels = c
-		for i in range(1, n):
-			layers.append(block(self.in_channels, c, t))
-		return nn.Sequential(*layers)
-
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		"""Constructs fully connected layer.
-
-		Args:
-			fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
-			input_dim (int): input dimension
-			dropout_p (float): dropout probability, if None, dropout is unused
-		"""
-		if fc_dims is None:
-			self.feature_dim = input_dim
-			return None
-
-		assert isinstance(
-			fc_dims, (list, tuple)
-		), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
-
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU(inplace=True))
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-
-		self.feature_dim = fc_dims[-1]
-
-		return nn.Sequential(*layers)
-
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.conv2(x)
-		x = self.conv3(x)
-		x = self.conv4(x)
-		x = self.conv5(x)
-		x = self.conv6(x)
-		x = self.conv7(x)
-		x = self.conv8(x)
-		x = self.conv9(x)
-		return x
-
-	def forward(self, x):
-		f = self.featuremaps(x)
-		v = self.global_avgpool(f)
-		v = v.view(v.size(0), -1)
-
-		if self.fc is not None:
-			v = self.fc(v)
-
-		if not self.training:
-			return v
-
-		y = self.classifier(v)
-
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    """MobileNetV2.
+
+    Reference:
+            Sandler et al. MobileNetV2: Inverted Residuals and
+            Linear Bottlenecks. CVPR 2018.
+
+    Public keys:
+            - ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
+            - ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        width_mult=1,
+        loss="softmax",
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    ):
+        super(MobileNetV2, self).__init__()
+        self.loss = loss
+        self.in_channels = int(32 * width_mult)
+        self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
+
+        # construct layers
+        self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
+        self.conv2 = self._make_layer(
+            Bottleneck, 1, int(16 * width_mult), 1, 1
+        )
+        self.conv3 = self._make_layer(
+            Bottleneck, 6, int(24 * width_mult), 2, 2
+        )
+        self.conv4 = self._make_layer(
+            Bottleneck, 6, int(32 * width_mult), 3, 2
+        )
+        self.conv5 = self._make_layer(
+            Bottleneck, 6, int(64 * width_mult), 4, 2
+        )
+        self.conv6 = self._make_layer(
+            Bottleneck, 6, int(96 * width_mult), 3, 1
+        )
+        self.conv7 = self._make_layer(
+            Bottleneck, 6, int(160 * width_mult), 3, 2
+        )
+        self.conv8 = self._make_layer(
+            Bottleneck, 6, int(320 * width_mult), 1, 1
+        )
+        self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
+
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = self._construct_fc_layer(
+            fc_dims, self.feature_dim, dropout_p
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+
+        self._init_params()
+
+    def _make_layer(self, block, t, c, n, s):
+        # t: expansion factor
+        # c: output channels
+        # n: number of blocks
+        # s: stride for first layer
+        layers = []
+        layers.append(block(self.in_channels, c, t, s))
+        self.in_channels = c
+        for i in range(1, n):
+            layers.append(block(self.in_channels, c, t))
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        """Constructs fully connected layer.
+
+        Args:
+                fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
+                input_dim (int): input dimension
+                dropout_p (float): dropout probability, if None, dropout is unused
+
+        """
+        if fc_dims is None:
+            self.feature_dim = input_dim
+            return None
+
+        assert isinstance(fc_dims, (list, tuple)), (
+            f"fc_dims must be either list or tuple, but got {type(fc_dims)}"
+        )
+
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+
+        self.feature_dim = fc_dims[-1]
+
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(
+                m, nn.BatchNorm1d
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        x = self.conv6(x)
+        x = self.conv7(x)
+        x = self.conv8(x)
+        x = self.conv9(x)
+        return x
+
+    def forward(self, x):
+        f = self.featuremaps(x)
+        v = self.global_avgpool(f)
+        v = v.view(v.size(0), -1)
+
+        if self.fc is not None:
+            v = self.fc(v)
+
+        if not self.training:
+            return v
+
+        y = self.classifier(v)
+
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
 
 
 def init_pretrained_weights(model, model_url):
-	"""Initializes model with pretrained weights.
+    """Initializes model with pretrained weights.
 
-	Layers that don't match with pretrained layers in name or size are kept unchanged.
-	"""
-	pretrain_dict = model_zoo.load_url(model_url)
-	model_dict = model.state_dict()
-	pretrain_dict = {
-		k: v
-		for k, v in pretrain_dict.items()
-		if k in model_dict and model_dict[k].size() == v.size()
-	}
-	model_dict.update(pretrain_dict)
-	model.load_state_dict(model_dict)
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    pretrain_dict = model_zoo.load_url(model_url)
+    model_dict = model.state_dict()
+    pretrain_dict = {
+        k: v
+        for k, v in pretrain_dict.items()
+        if k in model_dict and model_dict[k].size() == v.size()
+    }
+    model_dict.update(pretrain_dict)
+    model.load_state_dict(model_dict)
 
 
 def mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs):
-	model = MobileNetV2(
-		num_classes, loss=loss, width_mult=1, fc_dims=None, dropout_p=None, **kwargs
-	)
-	if pretrained:
-		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
-		import warnings
-
-		warnings.warn(
-			"The imagenet pretrained weights need to be manually downloaded from {}".format(
-				model_urls["mobilenetv2_x1_0"]
-			)
-		)
-	return model
+    model = MobileNetV2(
+        num_classes,
+        loss=loss,
+        width_mult=1,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
+        import warnings
+
+        warnings.warn(
+            "The imagenet pretrained weights need to be manually downloaded from {}".format(
+                model_urls["mobilenetv2_x1_0"]
+            )
+        )
+    return model
 
 
 def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):
-	model = MobileNetV2(
-		num_classes, loss=loss, width_mult=1.4, fc_dims=None, dropout_p=None, **kwargs
-	)
-	if pretrained:
-		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
-		import warnings
-
-		warnings.warn(
-			"The imagenet pretrained weights need to be manually downloaded from {}".format(
-				model_urls["mobilenetv2_x1_4"]
-			)
-		)
-	return model
+    model = MobileNetV2(
+        num_classes,
+        loss=loss,
+        width_mult=1.4,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
+        import warnings
+
+        warnings.warn(
+            "The imagenet pretrained weights need to be manually downloaded from {}".format(
+                model_urls["mobilenetv2_x1_4"]
+            )
+        )
+    return model
+
+
 # Copied from boxmot/boxmot/reid/backbones/mobilenetv2.py
diff --git a/ethology/reid/backbones/osnet.py b/ethology/reid/backbones/osnet.py
index c07e4e45..c13dd5b7 100644
--- a/ethology/reid/backbones/osnet.py
+++ b/ethology/reid/backbones/osnet.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import warnings
 
@@ -8,338 +7,529 @@
 from torch import nn
 from torch.nn import functional as F
 
-__all__ = ["osnet_x1_0", "osnet_x0_75", "osnet_x0_5", "osnet_x0_25", "osnet_ibn_x1_0"]
+__all__ = [
+    "osnet_x1_0",
+    "osnet_x0_75",
+    "osnet_x0_5",
+    "osnet_x0_25",
+    "osnet_ibn_x1_0",
+]
 
 pretrained_urls = {
-	"osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
-	"osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
-	"osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
-	"osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
-	"osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
+    "osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
+    "osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
+    "osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
+    "osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
+    "osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
 }
 
 # ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, ChannelGate, OSBlock...
 
+
 class ConvLayer(nn.Module):
-	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
-		super(ConvLayer, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
-		if IN:
-			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
-		else:
-			self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        groups=1,
+        IN=False,
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups,
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class Conv1x1(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv1x1, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class Conv1x1Linear(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1):
-		super(Conv1x1Linear, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
-		self.bn = nn.BatchNorm2d(out_channels)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
 
 class Conv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv3x3, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class LightConv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		super(LightConv3x3, self).__init__()
-		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
-		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv1(x)
-		x = self.conv2(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(self, in_channels, out_channels):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class ChannelGate(nn.Module):
-	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
-		super(ChannelGate, self).__init__()
-		if num_gates is None:
-			num_gates = in_channels
-		self.return_gates = return_gates
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
-		self.norm1 = None
-		if layer_norm:
-			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
-		self.relu = nn.ReLU(inplace=True)
-		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
-		if gate_activation == "sigmoid":
-			self.gate_activation = nn.Sigmoid()
-		elif gate_activation == "relu":
-			self.gate_activation = nn.ReLU(inplace=True)
-		elif gate_activation == "linear":
-			self.gate_activation = None
-		else:
-			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
-	def forward(self, x):
-		input = x
-		x = self.global_avgpool(x)
-		x = self.fc1(x)
-		if self.norm1 is not None:
-			x = self.norm1(x)
-		x = self.relu(x)
-		x = self.fc2(x)
-		if self.gate_activation is not None:
-			x = self.gate_activation(x)
-		if self.return_gates:
-			return x
-		return input * x
+    def __init__(
+        self,
+        in_channels,
+        num_gates=None,
+        return_gates=False,
+        gate_activation="sigmoid",
+        reduction=16,
+        layer_norm=False,
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None:
+            num_gates = in_channels
+        self.return_gates = return_gates
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        self.norm1 = None
+        if layer_norm:
+            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        if gate_activation == "sigmoid":
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == "relu":
+            self.gate_activation = nn.ReLU(inplace=True)
+        elif gate_activation == "linear":
+            self.gate_activation = None
+        else:
+            raise RuntimeError(f"Unknown gate activation: {gate_activation}")
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None:
+            x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        if self.gate_activation is not None:
+            x = self.gate_activation(x)
+        if self.return_gates:
+            return x
+        return input * x
+
 
 class OSBlock(nn.Module):
-	def __init__(self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs):
-		super(OSBlock, self).__init__()
-		mid_channels = out_channels // bottleneck_reduction
-		self.conv1 = Conv1x1(in_channels, mid_channels)
-		self.conv2a = LightConv3x3(mid_channels, mid_channels)
-		self.conv2b = nn.Sequential(
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-		)
-		self.conv2c = nn.Sequential(
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-		)
-		self.conv2d = nn.Sequential(
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-		)
-		self.gate = ChannelGate(mid_channels)
-		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
-		self.downsample = None
-		if in_channels != out_channels:
-			self.downsample = Conv1x1Linear(in_channels, out_channels)
-		self.IN = None
-		if IN:
-			self.IN = nn.InstanceNorm2d(out_channels, affine=True)
-	def forward(self, x):
-		identity = x
-		x1 = self.conv1(x)
-		x2a = self.conv2a(x1)
-		x2b = self.conv2b(x1)
-		x2c = self.conv2c(x1)
-		x2d = self.conv2d(x1)
-		x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
-		x3 = self.conv3(x2)
-		if self.downsample is not None:
-			identity = self.downsample(identity)
-		out = x3 + identity
-		if self.IN is not None:
-			out = self.IN(out)
-		return F.relu(out)
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        IN=False,
+        bottleneck_reduction=4,
+        **kwargs,
+    ):
+        super(OSBlock, self).__init__()
+        mid_channels = out_channels // bottleneck_reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2a = LightConv3x3(mid_channels, mid_channels)
+        self.conv2b = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2c = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2d = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+        self.IN = None
+        if IN:
+            self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2a = self.conv2a(x1)
+        x2b = self.conv2b(x1)
+        x2c = self.conv2c(x1)
+        x2d = self.conv2d(x1)
+        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        if self.IN is not None:
+            out = self.IN(out)
+        return F.relu(out)
+
 
 class OSNet(nn.Module):
-	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", IN=False, **kwargs):
-		super(OSNet, self).__init__()
-		num_blocks = len(blocks)
-		assert num_blocks == len(layers)
-		assert num_blocks == len(channels) - 1
-		self.loss = loss
-		self.feature_dim = feature_dim
-		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
-		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN)
-		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True)
-		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False)
-		self.conv5 = Conv1x1(channels[3], channels[3])
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-		self._init_params()
-	def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False):
-		layers = []
-		layers.append(block(in_channels, out_channels, IN=IN))
-		for i in range(1, layer):
-			layers.append(block(out_channels, out_channels, IN=IN))
-		if reduce_spatial_size:
-			layers.append(nn.Sequential(Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2)))
-		return nn.Sequential(*layers)
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		if fc_dims is None or fc_dims < 0:
-			self.feature_dim = input_dim
-			return None
-		if isinstance(fc_dims, int):
-			fc_dims = [fc_dims]
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU(inplace=True))
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-		self.feature_dim = fc_dims[-1]
-		return nn.Sequential(*layers)
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.maxpool(x)
-		x = self.conv2(x)
-		x = self.conv3(x)
-		x = self.conv4(x)
-		x = self.conv5(x)
-		return x
-	def forward(self, x, return_featuremaps=False):
-		x = self.featuremaps(x)
-		if return_featuremaps:
-			return x
-		v = self.global_avgpool(x)
-		v = v.view(v.size(0), -1)
-		if self.fc is not None:
-			v = self.fc(v)
-		if not self.training:
-			return v
-		y = self.classifier(v)
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        blocks,
+        layers,
+        channels,
+        feature_dim=512,
+        loss="softmax",
+        IN=False,
+        **kwargs,
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+        self.loss = loss
+        self.feature_dim = feature_dim
+        self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0],
+            layers[0],
+            channels[0],
+            channels[1],
+            reduce_spatial_size=True,
+            IN=IN,
+        )
+        self.conv3 = self._make_layer(
+            blocks[1],
+            layers[1],
+            channels[1],
+            channels[2],
+            reduce_spatial_size=True,
+        )
+        self.conv4 = self._make_layer(
+            blocks[2],
+            layers[2],
+            channels[2],
+            channels[3],
+            reduce_spatial_size=False,
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3])
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = self._construct_fc_layer(
+            self.feature_dim, channels[3], dropout_p=None
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+        self._init_params()
+
+    def _make_layer(
+        self,
+        block,
+        layer,
+        in_channels,
+        out_channels,
+        reduce_spatial_size,
+        IN=False,
+    ):
+        layers = []
+        layers.append(block(in_channels, out_channels, IN=IN))
+        for i in range(1, layer):
+            layers.append(block(out_channels, out_channels, IN=IN))
+        if reduce_spatial_size:
+            layers.append(
+                nn.Sequential(
+                    Conv1x1(out_channels, out_channels),
+                    nn.AvgPool2d(2, stride=2),
+                )
+            )
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None or fc_dims < 0:
+            self.feature_dim = input_dim
+            return None
+        if isinstance(fc_dims, int):
+            fc_dims = [fc_dims]
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+        self.feature_dim = fc_dims[-1]
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(
+                m, nn.BatchNorm1d
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+    def forward(self, x, return_featuremaps=False):
+        x = self.featuremaps(x)
+        if return_featuremaps:
+            return x
+        v = self.global_avgpool(x)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
+
 
 def init_pretrained_weights(model, key=""):
-	import errno
-	import os
-	from collections import OrderedDict
-	import gdown
-	def _get_torch_home():
-		ENV_TORCH_HOME = "TORCH_HOME"
-		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
-		DEFAULT_CACHE_DIR = "~/.cache"
-		torch_home = os.path.expanduser(
-			os.getenv(
-				ENV_TORCH_HOME,
-				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
-			)
-		)
-		return torch_home
-	filename = key + "_imagenet.pth"
-	# Try ethology/models/ directory first
-	ethology_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
-	models_dir = os.path.join(ethology_root, "models")
-	os.makedirs(models_dir, exist_ok=True)
-	local_file = os.path.join(models_dir, filename)
-	torch_home = _get_torch_home()
-	model_dir = os.path.join(torch_home, "checkpoints")
-	os.makedirs(model_dir, exist_ok=True)
-	cached_file = os.path.join(model_dir, filename)
-	# Prefer ethology/models/ directory file if present
-	if os.path.exists(local_file):
-		print(f"[OSNet] Loading model weights from {local_file}")
-		cached_file = local_file
-	elif os.path.exists(cached_file):
-		print(f"[OSNet] Loading model weights from {cached_file}")
-	else:
-		print(f"[OSNet] Downloading model weights to {cached_file}")
-		gdown.download(pretrained_urls[key], cached_file, quiet=False)
-	state_dict = torch.load(cached_file)
-	model_dict = model.state_dict()
-	new_state_dict = OrderedDict()
-	matched_layers, discarded_layers = [], []
-	for k, v in state_dict.items():
-		if k.startswith("module."):
-			k = k[7:]
-		if k in model_dict and model_dict[k].size() == v.size():
-			new_state_dict[k] = v
-			matched_layers.append(k)
-		else:
-			discarded_layers.append(k)
-	model_dict.update(new_state_dict)
-	model.load_state_dict(model_dict)
-	if len(matched_layers) == 0:
-		warnings.warn(
-			'The pretrained weights from "{}" cannot be loaded, '
-			"please check the key names manually "
-			"(** ignored and continue **)".format(cached_file)
-		)
-	else:
-		print(
-			'Successfully loaded imagenet pretrained weights from "{}"'.format(
-				cached_file
-			)
-		)
-		if len(discarded_layers) > 0:
-			print(
-				"** The following layers are discarded "
-				"due to unmatched keys or layer size: {}".format(discarded_layers)
-			)
+    import os
+    from collections import OrderedDict
+
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = "TORCH_HOME"
+        ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+        DEFAULT_CACHE_DIR = "~/.cache"
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"
+                ),
+            )
+        )
+        return torch_home
+
+    filename = key + "_imagenet.pth"
+    # Try ethology/models/ directory first
+    ethology_root = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "../../../")
+    )
+    models_dir = os.path.join(ethology_root, "models")
+    os.makedirs(models_dir, exist_ok=True)
+    local_file = os.path.join(models_dir, filename)
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, "checkpoints")
+    os.makedirs(model_dir, exist_ok=True)
+    cached_file = os.path.join(model_dir, filename)
+    # Prefer ethology/models/ directory file if present
+    if os.path.exists(local_file):
+        print(f"[OSNet] Loading model weights from {local_file}")
+        cached_file = local_file
+    elif os.path.exists(cached_file):
+        print(f"[OSNet] Loading model weights from {cached_file}")
+    else:
+        print(f"[OSNet] Downloading model weights to {cached_file}")
+        gdown.download(pretrained_urls[key], cached_file, quiet=False)
+    state_dict = torch.load(cached_file)
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+    for k, v in state_dict.items():
+        if k.startswith("module."):
+            k = k[7:]
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+    if len(matched_layers) == 0:
+        warnings.warn(
+            f'The pretrained weights from "{cached_file}" cannot be loaded, '
+            "please check the key names manually "
+            "(** ignored and continue **)"
+        )
+    else:
+        print(
+            f'Successfully loaded imagenet pretrained weights from "{cached_file}"'
+        )
+        if len(discarded_layers) > 0:
+            print(
+                "** The following layers are discarded "
+                f"due to unmatched keys or layer size: {discarded_layers}"
+            )
+
 
 def osnet_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x1_0")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x1_0")
+    return model
+
 
 def osnet_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x0_75")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[48, 192, 288, 384],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x0_75")
+    return model
+
 
 def osnet_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x0_5")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[32, 128, 192, 256],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x0_5")
+    return model
+
 
 def osnet_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x0_25")
-	return model
-
-def osnet_ibn_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ibn_x1_0")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[16, 64, 96, 128],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x0_25")
+    return model
+
+
+def osnet_ibn_x1_0(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ibn_x1_0")
+    return model
diff --git a/ethology/reid/backbones/osnet_ain.py b/ethology/reid/backbones/osnet_ain.py
index 9e052209..2ef3da25 100644
--- a/ethology/reid/backbones/osnet_ain.py
+++ b/ethology/reid/backbones/osnet_ain.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import warnings
 
@@ -8,349 +7,541 @@
 from torch import nn
 from torch.nn import functional as F
 
-__all__ = ["osnet_ain_x1_0", "osnet_ain_x0_75", "osnet_ain_x0_5", "osnet_ain_x0_25"]
+__all__ = [
+    "osnet_ain_x1_0",
+    "osnet_ain_x0_75",
+    "osnet_ain_x0_5",
+    "osnet_ain_x0_25",
+]
 
 pretrained_urls = {
-	"osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
-	"osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
-	"osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
-	"osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
+    "osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
+    "osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
+    "osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
+    "osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
 }
 
 # ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, LightConvStream, ChannelGate, OSBlock, OSBlockINin, OSNet, init_pretrained_weights, and instantiation functions...
 
+
 class ConvLayer(nn.Module):
-	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
-		super(ConvLayer, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
-		if IN:
-			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
-		else:
-			self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        groups=1,
+        IN=False,
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups,
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class Conv1x1(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv1x1, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class Conv1x1Linear(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, bn=True):
-		super(Conv1x1Linear, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
-		self.bn = None
-		if bn:
-			self.bn = nn.BatchNorm2d(out_channels)
-	def forward(self, x):
-		x = self.conv(x)
-		if self.bn is not None:
-			x = self.bn(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1, bn=True):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = None
+        if bn:
+            self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        return x
+
 
 class Conv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv3x3, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class LightConv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		super(LightConv3x3, self).__init__()
-		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
-		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv1(x)
-		x = self.conv2(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(self, in_channels, out_channels):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class LightConvStream(nn.Module):
-	def __init__(self, in_channels, out_channels, depth):
-		super(LightConvStream, self).__init__()
-		assert depth >= 1
-		layers = [LightConv3x3(in_channels, out_channels)]
-		for i in range(depth - 1):
-			layers.append(LightConv3x3(out_channels, out_channels))
-		self.layers = nn.Sequential(*layers)
-	def forward(self, x):
-		return self.layers(x)
+    def __init__(self, in_channels, out_channels, depth):
+        super(LightConvStream, self).__init__()
+        assert depth >= 1
+        layers = [LightConv3x3(in_channels, out_channels)]
+        for i in range(depth - 1):
+            layers.append(LightConv3x3(out_channels, out_channels))
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
 
 class ChannelGate(nn.Module):
-	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
-		super(ChannelGate, self).__init__()
-		if num_gates is None:
-			num_gates = in_channels
-		self.return_gates = return_gates
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
-		self.norm1 = None
-		if layer_norm:
-			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
-		self.relu = nn.ReLU()
-		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
-		if gate_activation == "sigmoid":
-			self.gate_activation = nn.Sigmoid()
-		elif gate_activation == "relu":
-			self.gate_activation = nn.ReLU()
-		elif gate_activation == "linear":
-			self.gate_activation = None
-		else:
-			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
-	def forward(self, x):
-		input = x
-		x = self.global_avgpool(x)
-		x = self.fc1(x)
-		if self.norm1 is not None:
-			x = self.norm1(x)
-		x = self.relu(x)
-		x = self.fc2(x)
-		if self.gate_activation is not None:
-			x = self.gate_activation(x)
-		if self.return_gates:
-			return x
-		return input * x
+    def __init__(
+        self,
+        in_channels,
+        num_gates=None,
+        return_gates=False,
+        gate_activation="sigmoid",
+        reduction=16,
+        layer_norm=False,
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None:
+            num_gates = in_channels
+        self.return_gates = return_gates
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        self.norm1 = None
+        if layer_norm:
+            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU()
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        if gate_activation == "sigmoid":
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == "relu":
+            self.gate_activation = nn.ReLU()
+        elif gate_activation == "linear":
+            self.gate_activation = None
+        else:
+            raise RuntimeError(f"Unknown gate activation: {gate_activation}")
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None:
+            x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        if self.gate_activation is not None:
+            x = self.gate_activation(x)
+        if self.return_gates:
+            return x
+        return input * x
+
 
 class OSBlock(nn.Module):
-	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
-		super(OSBlock, self).__init__()
-		assert T >= 1
-		assert out_channels >= reduction and out_channels % reduction == 0
-		mid_channels = out_channels // reduction
-		self.conv1 = Conv1x1(in_channels, mid_channels)
-		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
-		self.gate = ChannelGate(mid_channels)
-		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
-		self.downsample = None
-		if in_channels != out_channels:
-			self.downsample = Conv1x1Linear(in_channels, out_channels)
-	def forward(self, x):
-		identity = x
-		x1 = self.conv1(x)
-		x2 = 0
-		for conv2_t in self.conv2:
-			x2_t = conv2_t(x1)
-			x2 = x2 + self.gate(x2_t)
-		x3 = self.conv3(x2)
-		if self.downsample is not None:
-			identity = self.downsample(identity)
-		out = x3 + identity
-		return F.relu(out)
+    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+        super(OSBlock, self).__init__()
+        assert T >= 1
+        assert out_channels >= reduction and out_channels % reduction == 0
+        mid_channels = out_channels // reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2 = nn.ModuleList(
+            [
+                LightConvStream(mid_channels, mid_channels, t)
+                for t in range(1, T + 1)
+            ]
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2 = 0
+        for conv2_t in self.conv2:
+            x2_t = conv2_t(x1)
+            x2 = x2 + self.gate(x2_t)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        return F.relu(out)
+
 
 class OSBlockINin(nn.Module):
-	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
-		super(OSBlockINin, self).__init__()
-		assert T >= 1
-		assert out_channels >= reduction and out_channels % reduction == 0
-		mid_channels = out_channels // reduction
-		self.conv1 = Conv1x1(in_channels, mid_channels)
-		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
-		self.gate = ChannelGate(mid_channels)
-		self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
-		self.downsample = None
-		if in_channels != out_channels:
-			self.downsample = Conv1x1Linear(in_channels, out_channels)
-		self.IN = nn.InstanceNorm2d(out_channels, affine=True)
-	def forward(self, x):
-		identity = x
-		x1 = self.conv1(x)
-		x2 = 0
-		for conv2_t in self.conv2:
-			x2_t = conv2_t(x1)
-			x2 = x2 + self.gate(x2_t)
-		x3 = self.conv3(x2)
-		x3 = self.IN(x3)
-		if self.downsample is not None:
-			identity = self.downsample(identity)
-		out = x3 + identity
-		return F.relu(out)
+    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+        super(OSBlockINin, self).__init__()
+        assert T >= 1
+        assert out_channels >= reduction and out_channels % reduction == 0
+        mid_channels = out_channels // reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2 = nn.ModuleList(
+            [
+                LightConvStream(mid_channels, mid_channels, t)
+                for t in range(1, T + 1)
+            ]
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+        self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2 = 0
+        for conv2_t in self.conv2:
+            x2_t = conv2_t(x1)
+            x2 = x2 + self.gate(x2_t)
+        x3 = self.conv3(x2)
+        x3 = self.IN(x3)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        return F.relu(out)
+
 
 class OSNet(nn.Module):
-	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", conv1_IN=False, **kwargs):
-		super(OSNet, self).__init__()
-		num_blocks = len(blocks)
-		assert num_blocks == len(layers)
-		assert num_blocks == len(channels) - 1
-		self.loss = loss
-		self.feature_dim = feature_dim
-		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=conv1_IN)
-		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1])
-		self.pool2 = nn.Sequential(Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2))
-		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2])
-		self.pool3 = nn.Sequential(Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2))
-		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3])
-		self.conv5 = Conv1x1(channels[3], channels[3])
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-		self._init_params()
-	def _make_layer(self, blocks, layer, in_channels, out_channels):
-		layers = []
-		layers += [blocks[0](in_channels, out_channels)]
-		for i in range(1, len(blocks)):
-			layers += [blocks[i](out_channels, out_channels)]
-		return nn.Sequential(*layers)
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		if fc_dims is None or fc_dims < 0:
-			self.feature_dim = input_dim
-			return None
-		if isinstance(fc_dims, int):
-			fc_dims = [fc_dims]
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU())
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-		self.feature_dim = fc_dims[-1]
-		return nn.Sequential(*layers)
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.InstanceNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.maxpool(x)
-		x = self.conv2(x)
-		x = self.pool2(x)
-		x = self.conv3(x)
-		x = self.pool3(x)
-		x = self.conv4(x)
-		x = self.conv5(x)
-		return x
-	def forward(self, x, return_featuremaps=False):
-		x = self.featuremaps(x)
-		if return_featuremaps:
-			return x
-		v = self.global_avgpool(x)
-		v = v.view(v.size(0), -1)
-		if self.fc is not None:
-			v = self.fc(v)
-		if not self.training:
-			return v
-		y = self.classifier(v)
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        blocks,
+        layers,
+        channels,
+        feature_dim=512,
+        loss="softmax",
+        conv1_IN=False,
+        **kwargs,
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+        self.loss = loss
+        self.feature_dim = feature_dim
+        self.conv1 = ConvLayer(
+            3, channels[0], 7, stride=2, padding=3, IN=conv1_IN
+        )
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0], layers[0], channels[0], channels[1]
+        )
+        self.pool2 = nn.Sequential(
+            Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2)
+        )
+        self.conv3 = self._make_layer(
+            blocks[1], layers[1], channels[1], channels[2]
+        )
+        self.pool3 = nn.Sequential(
+            Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2)
+        )
+        self.conv4 = self._make_layer(
+            blocks[2], layers[2], channels[2], channels[3]
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3])
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = self._construct_fc_layer(
+            self.feature_dim, channels[3], dropout_p=None
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+        self._init_params()
+
+    def _make_layer(self, blocks, layer, in_channels, out_channels):
+        layers = []
+        layers += [blocks[0](in_channels, out_channels)]
+        for i in range(1, len(blocks)):
+            layers += [blocks[i](out_channels, out_channels)]
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None or fc_dims < 0:
+            self.feature_dim = input_dim
+            return None
+        if isinstance(fc_dims, int):
+            fc_dims = [fc_dims]
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU())
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+        self.feature_dim = fc_dims[-1]
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif (
+                isinstance(m, nn.BatchNorm2d)
+                or isinstance(m, nn.BatchNorm1d)
+                or isinstance(m, nn.InstanceNorm2d)
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.pool2(x)
+        x = self.conv3(x)
+        x = self.pool3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+    def forward(self, x, return_featuremaps=False):
+        x = self.featuremaps(x)
+        if return_featuremaps:
+            return x
+        v = self.global_avgpool(x)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
+
 
 def init_pretrained_weights(model, key=""):
-	import errno
-	import os
-	from collections import OrderedDict
-	import gdown
-	def _get_torch_home():
-		ENV_TORCH_HOME = "TORCH_HOME"
-		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
-		DEFAULT_CACHE_DIR = "~/.cache"
-		torch_home = os.path.expanduser(
-			os.getenv(
-				ENV_TORCH_HOME,
-				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
-			)
-		)
-		return torch_home
-	torch_home = _get_torch_home()
-	model_dir = os.path.join(torch_home, "checkpoints")
-	try:
-		os.makedirs(model_dir)
-	except OSError as e:
-		if e.errno == errno.EEXIST:
-			pass
-		else:
-			raise
-	filename = key + "_imagenet.pth"
-	cached_file = os.path.join(model_dir, filename)
-	if not os.path.exists(cached_file):
-		gdown.download(pretrained_urls[key], cached_file, quiet=False)
-	state_dict = torch.load(cached_file)
-	model_dict = model.state_dict()
-	new_state_dict = OrderedDict()
-	matched_layers, discarded_layers = [], []
-	for k, v in state_dict.items():
-		if k.startswith("module."):
-			k = k[7:]
-		if k in model_dict and model_dict[k].size() == v.size():
-			new_state_dict[k] = v
-			matched_layers.append(k)
-		else:
-			discarded_layers.append(k)
-	model_dict.update(new_state_dict)
-	model.load_state_dict(model_dict)
-	if len(matched_layers) == 0:
-		warnings.warn(
-			'The pretrained weights from "{}" cannot be loaded, '
-			"please check the key names manually "
-			"(** ignored and continue **)".format(cached_file)
-		)
-	else:
-		print(
-			'Successfully loaded imagenet pretrained weights from "{}"'.format(
-				cached_file
-			)
-		)
-		if len(discarded_layers) > 0:
-			print(
-				"** The following layers are discarded "
-				"due to unmatched keys or layer size: {}".format(discarded_layers)
-			)
-
-def osnet_ain_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x1_0")
-	return model
-
-def osnet_ain_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x0_75")
-	return model
-
-def osnet_ain_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x0_5")
-	return model
-
-def osnet_ain_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x0_25")
-	return model
+    import errno
+    import os
+    from collections import OrderedDict
+
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = "TORCH_HOME"
+        ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+        DEFAULT_CACHE_DIR = "~/.cache"
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"
+                ),
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, "checkpoints")
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            raise
+    filename = key + "_imagenet.pth"
+    cached_file = os.path.join(model_dir, filename)
+    if not os.path.exists(cached_file):
+        gdown.download(pretrained_urls[key], cached_file, quiet=False)
+    state_dict = torch.load(cached_file)
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+    for k, v in state_dict.items():
+        if k.startswith("module."):
+            k = k[7:]
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+    if len(matched_layers) == 0:
+        warnings.warn(
+            f'The pretrained weights from "{cached_file}" cannot be loaded, '
+            "please check the key names manually "
+            "(** ignored and continue **)"
+        )
+    else:
+        print(
+            f'Successfully loaded imagenet pretrained weights from "{cached_file}"'
+        )
+        if len(discarded_layers) > 0:
+            print(
+                "** The following layers are discarded "
+                f"due to unmatched keys or layer size: {discarded_layers}"
+            )
+
+
+def osnet_ain_x1_0(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x1_0")
+    return model
+
+
+def osnet_ain_x0_75(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[48, 192, 288, 384],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x0_75")
+    return model
+
+
+def osnet_ain_x0_5(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[32, 128, 192, 256],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x0_5")
+    return model
+
+
+def osnet_ain_x0_25(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[16, 64, 96, 128],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x0_25")
+    return model
diff --git a/ethology/reid/backbones/resnet.py b/ethology/reid/backbones/resnet.py
index 12ca5639..80eda98e 100644
--- a/ethology/reid/backbones/resnet.py
+++ b/ethology/reid/backbones/resnet.py
@@ -1,273 +1,461 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-"""
-Code source: https://github.com/pytorch/vision
-"""
-from __future__ import absolute_import, division
+"""Code source: https://github.com/pytorch/vision"""
 
 import torch.utils.model_zoo as model_zoo
 from torch import nn
 
 __all__ = [
-	"resnet18",
-	"resnet34",
-	"resnet50",
-	"resnet101",
-	"resnet152",
-	"resnext50_32x4d",
-	"resnext101_32x8d",
-	"resnet50_fc512",
+    "resnet18",
+    "resnet34",
+    "resnet50",
+    "resnet101",
+    "resnet152",
+    "resnext50_32x4d",
+    "resnext101_32x8d",
+    "resnet50_fc512",
 ]
 
 model_urls = {
-	"resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
-	"resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
-	"resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
-	"resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
-	"resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
-	"resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
-	"resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+    "resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
+    "resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
+    "resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
+    "resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
+    "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
+    "resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
+    "resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
 }
 
 # ...existing code for conv3x3, conv1x1, BasicBlock, Bottleneck, ResNet, init_pretrained_weights, and instantiation functions...
 
+
 def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
-	return nn.Conv2d(
-		in_planes,
-		out_planes,
-		kernel_size=3,
-		stride=stride,
-		padding=dilation,
-		groups=groups,
-		bias=False,
-		dilation=dilation,
-	)
+    return nn.Conv2d(
+        in_planes,
+        out_planes,
+        kernel_size=3,
+        stride=stride,
+        padding=dilation,
+        groups=groups,
+        bias=False,
+        dilation=dilation,
+    )
+
 
 def conv1x1(in_planes, out_planes, stride=1):
-	return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+    return nn.Conv2d(
+        in_planes, out_planes, kernel_size=1, stride=stride, bias=False
+    )
+
 
 class BasicBlock(nn.Module):
-	expansion = 1
-	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
-		super(BasicBlock, self).__init__()
-		if norm_layer is None:
-			norm_layer = nn.BatchNorm2d
-		if groups != 1 or base_width != 64:
-			raise ValueError("BasicBlock only supports groups=1 and base_width=64")
-		if dilation > 1:
-			raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
-		self.conv1 = conv3x3(inplanes, planes, stride)
-		self.bn1 = norm_layer(planes)
-		self.relu = nn.ReLU(inplace=True)
-		self.conv2 = conv3x3(planes, planes)
-		self.bn2 = norm_layer(planes)
-		self.downsample = downsample
-		self.stride = stride
-	def forward(self, x):
-		identity = x
-		out = self.conv1(x)
-		out = self.bn1(out)
-		out = self.relu(out)
-		out = self.conv2(out)
-		out = self.bn2(out)
-		if self.downsample is not None:
-			identity = self.downsample(x)
-		out += identity
-		out = self.relu(out)
-		return out
+    expansion = 1
+
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+        norm_layer=None,
+    ):
+        super(BasicBlock, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        if groups != 1 or base_width != 64:
+            raise ValueError(
+                "BasicBlock only supports groups=1 and base_width=64"
+            )
+        if dilation > 1:
+            raise NotImplementedError(
+                "Dilation > 1 not supported in BasicBlock"
+            )
+        self.conv1 = conv3x3(inplanes, planes, stride)
+        self.bn1 = norm_layer(planes)
+        self.relu = nn.ReLU(inplace=True)
+        self.conv2 = conv3x3(planes, planes)
+        self.bn2 = norm_layer(planes)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+
 
 class Bottleneck(nn.Module):
-	expansion = 4
-	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
-		super(Bottleneck, self).__init__()
-		if norm_layer is None:
-			norm_layer = nn.BatchNorm2d
-		width = int(planes * (base_width / 64.0)) * groups
-		self.conv1 = conv1x1(inplanes, width)
-		self.bn1 = norm_layer(width)
-		self.conv2 = conv3x3(width, width, stride, groups, dilation)
-		self.bn2 = norm_layer(width)
-		self.conv3 = conv1x1(width, planes * self.expansion)
-		self.bn3 = norm_layer(planes * self.expansion)
-		self.relu = nn.ReLU(inplace=True)
-		self.downsample = downsample
-		self.stride = stride
-	def forward(self, x):
-		identity = x
-		out = self.conv1(x)
-		out = self.bn1(out)
-		out = self.relu(out)
-		out = self.conv2(out)
-		out = self.bn2(out)
-		out = self.relu(out)
-		out = self.conv3(out)
-		out = self.bn3(out)
-		if self.downsample is not None:
-			identity = self.downsample(x)
-		out += identity
-		out = self.relu(out)
-		return out
+    expansion = 4
+
+    def __init__(
+        self,
+        inplanes,
+        planes,
+        stride=1,
+        downsample=None,
+        groups=1,
+        base_width=64,
+        dilation=1,
+        norm_layer=None,
+    ):
+        super(Bottleneck, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        width = int(planes * (base_width / 64.0)) * groups
+        self.conv1 = conv1x1(inplanes, width)
+        self.bn1 = norm_layer(width)
+        self.conv2 = conv3x3(width, width, stride, groups, dilation)
+        self.bn2 = norm_layer(width)
+        self.conv3 = conv1x1(width, planes * self.expansion)
+        self.bn3 = norm_layer(planes * self.expansion)
+        self.relu = nn.ReLU(inplace=True)
+        self.downsample = downsample
+        self.stride = stride
+
+    def forward(self, x):
+        identity = x
+        out = self.conv1(x)
+        out = self.bn1(out)
+        out = self.relu(out)
+        out = self.conv2(out)
+        out = self.bn2(out)
+        out = self.relu(out)
+        out = self.conv3(out)
+        out = self.bn3(out)
+        if self.downsample is not None:
+            identity = self.downsample(x)
+        out += identity
+        out = self.relu(out)
+        return out
+
 
 class ResNet(nn.Module):
-	def __init__(self, num_classes, loss, block, layers, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, last_stride=2, fc_dims=None, dropout_p=None, **kwargs):
-		super(ResNet, self).__init__()
-		if norm_layer is None:
-			norm_layer = nn.BatchNorm2d
-		self._norm_layer = norm_layer
-		self.loss = loss
-		self.feature_dim = 512 * block.expansion
-		self.inplanes = 64
-		self.dilation = 1
-		if replace_stride_with_dilation is None:
-			replace_stride_with_dilation = [False, False, False]
-		if len(replace_stride_with_dilation) != 3:
-			raise ValueError("replace_stride_with_dilation should be None or a 3-element tuple, got {}".format(replace_stride_with_dilation))
-		self.groups = groups
-		self.base_width = width_per_group
-		self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
-		self.bn1 = norm_layer(self.inplanes)
-		self.relu = nn.ReLU(inplace=True)
-		self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
-		self.layer1 = self._make_layer(block, 64, layers[0])
-		self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
-		self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
-		self.layer4 = self._make_layer(block, 512, layers[3], stride=last_stride, dilate=replace_stride_with_dilation[2])
-		self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
-		self.fc = self._construct_fc_layer(fc_dims, 512 * block.expansion, dropout_p)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-		self._init_params()
-		if zero_init_residual:
-			for m in self.modules():
-				if isinstance(m, Bottleneck):
-					nn.init.constant_(m.bn3.weight, 0)
-				elif isinstance(m, BasicBlock):
-					nn.init.constant_(m.bn2.weight, 0)
-	def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
-		norm_layer = self._norm_layer
-		downsample = None
-		previous_dilation = self.dilation
-		if dilate:
-			self.dilation *= stride
-			stride = 1
-		if stride != 1 or self.inplanes != planes * block.expansion:
-			downsample = nn.Sequential(
-				conv1x1(self.inplanes, planes * block.expansion, stride),
-				norm_layer(planes * block.expansion),
-			)
-		layers = []
-		layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer))
-		self.inplanes = planes * block.expansion
-		for _ in range(1, blocks):
-			layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer))
-		return nn.Sequential(*layers)
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		if fc_dims is None:
-			self.feature_dim = input_dim
-			return None
-		assert isinstance(fc_dims, (list, tuple)), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU(inplace=True))
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-		self.feature_dim = fc_dims[-1]
-		return nn.Sequential(*layers)
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.bn1(x)
-		x = self.relu(x)
-		x = self.maxpool(x)
-		x = self.layer1(x)
-		x = self.layer2(x)
-		x = self.layer3(x)
-		x = self.layer4(x)
-		return x
-	def forward(self, x):
-		f = self.featuremaps(x)
-		v = self.global_avgpool(f)
-		v = v.view(v.size(0), -1)
-		if self.fc is not None:
-			v = self.fc(v)
-		if not self.training:
-			return v
-		y = self.classifier(v)
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        loss,
+        block,
+        layers,
+        zero_init_residual=False,
+        groups=1,
+        width_per_group=64,
+        replace_stride_with_dilation=None,
+        norm_layer=None,
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    ):
+        super(ResNet, self).__init__()
+        if norm_layer is None:
+            norm_layer = nn.BatchNorm2d
+        self._norm_layer = norm_layer
+        self.loss = loss
+        self.feature_dim = 512 * block.expansion
+        self.inplanes = 64
+        self.dilation = 1
+        if replace_stride_with_dilation is None:
+            replace_stride_with_dilation = [False, False, False]
+        if len(replace_stride_with_dilation) != 3:
+            raise ValueError(
+                f"replace_stride_with_dilation should be None or a 3-element tuple, got {replace_stride_with_dilation}"
+            )
+        self.groups = groups
+        self.base_width = width_per_group
+        self.conv1 = nn.Conv2d(
+            3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False
+        )
+        self.bn1 = norm_layer(self.inplanes)
+        self.relu = nn.ReLU(inplace=True)
+        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+        self.layer1 = self._make_layer(block, 64, layers[0])
+        self.layer2 = self._make_layer(
+            block,
+            128,
+            layers[1],
+            stride=2,
+            dilate=replace_stride_with_dilation[0],
+        )
+        self.layer3 = self._make_layer(
+            block,
+            256,
+            layers[2],
+            stride=2,
+            dilate=replace_stride_with_dilation[1],
+        )
+        self.layer4 = self._make_layer(
+            block,
+            512,
+            layers[3],
+            stride=last_stride,
+            dilate=replace_stride_with_dilation[2],
+        )
+        self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
+        self.fc = self._construct_fc_layer(
+            fc_dims, 512 * block.expansion, dropout_p
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+        self._init_params()
+        if zero_init_residual:
+            for m in self.modules():
+                if isinstance(m, Bottleneck):
+                    nn.init.constant_(m.bn3.weight, 0)
+                elif isinstance(m, BasicBlock):
+                    nn.init.constant_(m.bn2.weight, 0)
+
+    def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+        norm_layer = self._norm_layer
+        downsample = None
+        previous_dilation = self.dilation
+        if dilate:
+            self.dilation *= stride
+            stride = 1
+        if stride != 1 or self.inplanes != planes * block.expansion:
+            downsample = nn.Sequential(
+                conv1x1(self.inplanes, planes * block.expansion, stride),
+                norm_layer(planes * block.expansion),
+            )
+        layers = []
+        layers.append(
+            block(
+                self.inplanes,
+                planes,
+                stride,
+                downsample,
+                self.groups,
+                self.base_width,
+                previous_dilation,
+                norm_layer,
+            )
+        )
+        self.inplanes = planes * block.expansion
+        for _ in range(1, blocks):
+            layers.append(
+                block(
+                    self.inplanes,
+                    planes,
+                    groups=self.groups,
+                    base_width=self.base_width,
+                    dilation=self.dilation,
+                    norm_layer=norm_layer,
+                )
+            )
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None:
+            self.feature_dim = input_dim
+            return None
+        assert isinstance(fc_dims, (list, tuple)), (
+            f"fc_dims must be either list or tuple, but got {type(fc_dims)}"
+        )
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+        self.feature_dim = fc_dims[-1]
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(
+                m, nn.BatchNorm1d
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.relu(x)
+        x = self.maxpool(x)
+        x = self.layer1(x)
+        x = self.layer2(x)
+        x = self.layer3(x)
+        x = self.layer4(x)
+        return x
+
+    def forward(self, x):
+        f = self.featuremaps(x)
+        v = self.global_avgpool(f)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
+
 
 def init_pretrained_weights(model, model_url):
-	pretrain_dict = model_zoo.load_url(model_url)
-	model_dict = model.state_dict()
-	pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
-	model_dict.update(pretrain_dict)
-	model.load_state_dict(model_dict)
+    pretrain_dict = model_zoo.load_url(model_url)
+    model_dict = model.state_dict()
+    pretrain_dict = {
+        k: v
+        for k, v in pretrain_dict.items()
+        if k in model_dict and model_dict[k].size() == v.size()
+    }
+    model_dict.update(pretrain_dict)
+    model.load_state_dict(model_dict)
+
 
 def resnet18(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=BasicBlock, layers=[2, 2, 2, 2], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnet18"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=BasicBlock,
+        layers=[2, 2, 2, 2],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnet18"])
+    return model
+
 
 def resnet34(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=BasicBlock, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnet34"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=BasicBlock,
+        layers=[3, 4, 6, 3],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnet34"])
+    return model
+
 
 def resnet50(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnet50"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=Bottleneck,
+        layers=[3, 4, 6, 3],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnet50"])
+    return model
+
 
 def resnet101(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnet101"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=Bottleneck,
+        layers=[3, 4, 23, 3],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnet101"])
+    return model
+
 
 def resnet152(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 8, 36, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnet152"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=Bottleneck,
+        layers=[3, 8, 36, 3],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnet152"])
+    return model
+
 
 def resnext50_32x4d(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=4, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnext50_32x4d"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=Bottleneck,
+        layers=[3, 4, 6, 3],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        groups=32,
+        width_per_group=4,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnext50_32x4d"])
+    return model
+
 
 def resnext101_32x8d(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=8, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnext101_32x8d"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=Bottleneck,
+        layers=[3, 4, 23, 3],
+        last_stride=2,
+        fc_dims=None,
+        dropout_p=None,
+        groups=32,
+        width_per_group=8,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnext101_32x8d"])
+    return model
+
 
 def resnet50_fc512(num_classes, loss="softmax", pretrained=True, **kwargs):
-	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, fc_dims=[512], dropout_p=None, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, model_urls["resnet50"])
-	return model
+    model = ResNet(
+        num_classes=num_classes,
+        loss=loss,
+        block=Bottleneck,
+        layers=[3, 4, 6, 3],
+        last_stride=1,
+        fc_dims=[512],
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, model_urls["resnet50"])
+    return model
diff --git a/ethology/reid/backends/base_backend.py b/ethology/reid/backends/base_backend.py
index 688ec43a..0edb2826 100644
--- a/ethology/reid/backends/base_backend.py
+++ b/ethology/reid/backends/base_backend.py
@@ -1,18 +1,19 @@
-
-import os
 from abc import abstractmethod
 from pathlib import Path
+
 import cv2
 import gdown
 import numpy as np
 import torch
 from filelock import SoftFileLock
+
 from ethology.reid.core.registry import ReIDModelRegistry
+
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 # from ethology.utils.checks import RequirementsChecker  # If needed, implement or set RequirementsChecker
 
-class BaseModelBackend:
 
+class BaseModelBackend:
     def __init__(self, weights, device, half):
         self.weights = weights[0] if isinstance(weights, list) else weights
         if isinstance(self.weights, str):
@@ -22,7 +23,7 @@ def __init__(self, weights, device, half):
         self.half = half
         self.model = None
         # Support both string and torch.device for device
-        if hasattr(self.device, 'type'):
+        if hasattr(self.device, "type"):
             self.cuda = torch.cuda.is_available() and self.device.type != "cpu"
         else:
             self.cuda = torch.cuda.is_available() and self.device != "cpu"
@@ -41,11 +42,19 @@ def __init__(self, weights, device, half):
 
         self.load_model(self.weights)
 
-        self.mean_array = torch.tensor([0.485, 0.456, 0.406], device=self.device).view(1, 3, 1, 1)
-        self.std_array = torch.tensor([0.229, 0.224, 0.225], device=self.device).view(1, 3, 1, 1)
+        self.mean_array = torch.tensor(
+            [0.485, 0.456, 0.406], device=self.device
+        ).view(1, 3, 1, 1)
+        self.std_array = torch.tensor(
+            [0.229, 0.224, 0.225], device=self.device
+        ).view(1, 3, 1, 1)
         if "clip" in self.model_name:
-            self.mean_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
-            self.std_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
+            self.mean_array = torch.tensor(
+                [0.5, 0.5, 0.5], device=self.device
+            ).view(1, 3, 1, 1)
+            self.std_array = torch.tensor(
+                [0.5, 0.5, 0.5], device=self.device
+            ).view(1, 3, 1, 1)
 
         if "vehicleid" in self.weights.name or "veri" in self.weights.name:
             input_shape = (256, 256)
@@ -57,7 +66,6 @@ def __init__(self, weights, device, half):
             input_shape = (256, 128)
         self.input_shape = input_shape
 
-
     def get_crops(self, xyxys, img):
         h, w = img.shape[:2]
         interpolation_method = cv2.INTER_LINEAR
@@ -85,7 +93,6 @@ def get_crops(self, xyxys, img):
         crops = (crops - self.mean_array) / self.std_array
         return crops
 
-
     @torch.no_grad()
     def get_features(self, xyxys, img):
         if xyxys.size != 0:
@@ -98,7 +105,6 @@ def get_features(self, xyxys, img):
         features = features / np.linalg.norm(features, axis=-1, keepdims=True)
         return features
 
-
     def warmup(self, imgsz=[(256, 128, 3)]):
         if self.device.type != "cpu":
             im = np.random.randint(0, 255, *imgsz, dtype=np.uint8)
@@ -108,11 +114,9 @@ def warmup(self, imgsz=[(256, 128, 3)]):
             crops = self.inference_preprocess(crops)
             self.forward(crops)
 
-
     def to_numpy(self, x):
         return x.cpu().numpy() if isinstance(x, torch.Tensor) else x
 
-
     def inference_preprocess(self, x):
         if self.half:
             if isinstance(x, torch.Tensor):
@@ -121,32 +125,34 @@ def inference_preprocess(self, x):
             elif isinstance(x, np.ndarray):
                 if x.dtype != np.float16:
                     x = x.astype(np.float16)
-        if hasattr(self, 'nhwc') and self.nhwc:
+        if hasattr(self, "nhwc") and self.nhwc:
             if isinstance(x, torch.Tensor):
                 x = x.permute(0, 2, 3, 1)
             elif isinstance(x, np.ndarray):
                 x = np.transpose(x, (0, 2, 3, 1))
         return x
 
-
     def inference_postprocess(self, features):
         if isinstance(features, (list, tuple)):
             return (
-                self.to_numpy(features[0]) if len(features) == 1 else [self.to_numpy(x) for x in features]
+                self.to_numpy(features[0])
+                if len(features) == 1
+                else [self.to_numpy(x) for x in features]
             )
         else:
             return self.to_numpy(features)
 
-
     @abstractmethod
     def forward(self, im_batch):
-        raise NotImplementedError("This method should be implemented by subclasses.")
-
+        raise NotImplementedError(
+            "This method should be implemented by subclasses."
+        )
 
     @abstractmethod
     def load_model(self, w):
-        raise NotImplementedError("This method should be implemented by subclasses.")
-
+        raise NotImplementedError(
+            "This method should be implemented by subclasses."
+        )
 
     def download_model(self, w):
         if isinstance(w, str):
diff --git a/ethology/reid/backends/onnx_backend.py b/ethology/reid/backends/onnx_backend.py
index c7c93017..41aefb7f 100644
--- a/ethology/reid/backends/onnx_backend.py
+++ b/ethology/reid/backends/onnx_backend.py
@@ -1,31 +1,34 @@
-
 from ethology.reid.backends.base_backend import BaseModelBackend
 
+
 class ONNXBackend(BaseModelBackend):
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
+
+    def load_model(self, w):
+        # ONNXRuntime will attempt to use the first provider, and if it fails or is not
+        # available for some reason, it will fall back to the next provider in the list
+        if self.device.type == "mps":
+            # self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
+            providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
+        elif self.device.type == "cuda":
+            # self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
+            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        else:
+            # self.checker.check_packages(("onnxruntime==1.18.1",))
+            providers = ["CPUExecutionProvider"]
+        import onnxruntime
 
-	def load_model(self, w):
-		# ONNXRuntime will attempt to use the first provider, and if it fails or is not
-		# available for some reason, it will fall back to the next provider in the list
-		if self.device.type == "mps":
-			# self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
-			providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
-		elif self.device.type == "cuda":
-			# self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
-			providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-		else:
-			# self.checker.check_packages(("onnxruntime==1.18.1",))
-			providers = ["CPUExecutionProvider"]
-		import onnxruntime
-		self.session = onnxruntime.InferenceSession(str(w), providers=providers)
+        self.session = onnxruntime.InferenceSession(
+            str(w), providers=providers
+        )
 
-	def forward(self, im_batch):
-		im_batch = im_batch.cpu().numpy()
-		features = self.session.run(
-			[self.session.get_outputs()[0].name],
-			{self.session.get_inputs()[0].name: im_batch},
-		)[0]
-		return features
+    def forward(self, im_batch):
+        im_batch = im_batch.cpu().numpy()
+        features = self.session.run(
+            [self.session.get_outputs()[0].name],
+            {self.session.get_inputs()[0].name: im_batch},
+        )[0]
+        return features
diff --git a/ethology/reid/backends/openvino_backend.py b/ethology/reid/backends/openvino_backend.py
index f06392bf..0c56a06e 100644
--- a/ethology/reid/backends/openvino_backend.py
+++ b/ethology/reid/backends/openvino_backend.py
@@ -1,48 +1,49 @@
 from pathlib import Path
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+
 # Note: LOGGER can be replaced with print or a local logger if needed
 
+
 class OpenVinoBackend(BaseModelBackend):
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
+
+    def load_model(self, w):
+        # self.checker.check_packages(("openvino>=2025.2.0",))
+
+        print(f"Loading {w} for OpenVINO inference...")
+        try:
+            # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            from openvino import Core, Layout
+        except ImportError:
+            print(
+                f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
+                "requires openvino pip package to be installed!\n"
+                "$ pip install openvino>=2025.2.0\n"
+            )
+            raise
+        ie = Core()
+        w = Path(w)
+        print(w)
+        if w.suffix == ".bin":
+            w = w.with_suffix(".xml")
+
+        if not w.is_file():  # if not *.xml
+            w = next(
+                Path(w).glob("*.xml")
+            )  # get *.xml file from *_openvino_model dir
+        network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
+        if network.get_parameters()[0].get_layout().empty:
+            network.get_parameters()[0].set_layout(Layout("NCWH"))
+        self.executable_network = ie.compile_model(
+            network, device_name="CPU"
+        )  # device_name="MYRIAD" for Intel NCS2
+        self.output_layer = next(iter(self.executable_network.outputs))
 
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
-
-	def load_model(self, w):
-		# self.checker.check_packages(("openvino>=2025.2.0",))
-
-		print(f"Loading {w} for OpenVINO inference...")
-		try:
-			# requires openvino-dev: https://pypi.org/project/openvino-dev/
-			from openvino import Core, Layout
-		except ImportError:
-			print(
-				f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
-				"requires openvino pip package to be installed!\n"
-				"$ pip install openvino>=2025.2.0\n"
-			)
-			raise
-		ie = Core()
-		w = Path(w)
-		print(w)
-		if w.suffix == '.bin':
-			w = w.with_suffix('.xml')
-
-		if not w.is_file():  # if not *.xml
-			w = next(
-				Path(w).glob("*.xml")
-			)  # get *.xml file from *_openvino_model dir
-		network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
-		if network.get_parameters()[0].get_layout().empty:
-			network.get_parameters()[0].set_layout(Layout("NCWH"))
-		self.executable_network = ie.compile_model(
-			network, device_name="CPU"
-		)  # device_name="MYRIAD" for Intel NCS2
-		self.output_layer = next(iter(self.executable_network.outputs))
-
-	def forward(self, im_batch):
-		im_batch = im_batch.cpu().numpy()  # FP32
-		features = self.executable_network([im_batch])[self.output_layer]
-		return features
+    def forward(self, im_batch):
+        im_batch = im_batch.cpu().numpy()  # FP32
+        features = self.executable_network([im_batch])[self.output_layer]
+        return features
diff --git a/ethology/reid/backends/pytorch_backend.py b/ethology/reid/backends/pytorch_backend.py
index d3dbfa06..2e859cc8 100644
--- a/ethology/reid/backends/pytorch_backend.py
+++ b/ethology/reid/backends/pytorch_backend.py
@@ -1,20 +1,20 @@
 from ethology.reid.backends.base_backend import BaseModelBackend
 from ethology.reid.core.registry import ReIDModelRegistry
 
-class PyTorchBackend(BaseModelBackend):
 
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
+class PyTorchBackend(BaseModelBackend):
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
 
-	def load_model(self, w):
-		# Load a PyTorch model
-		if w and w.is_file():
-			ReIDModelRegistry.load_pretrained_weights(self.model, w)
-		self.model.to(self.device).eval()
-		self.model.half() if self.half else self.model.float()
+    def load_model(self, w):
+        # Load a PyTorch model
+        if w and w.is_file():
+            ReIDModelRegistry.load_pretrained_weights(self.model, w)
+        self.model.to(self.device).eval()
+        self.model.half() if self.half else self.model.float()
 
-	def forward(self, im_batch):
-		features = self.model(im_batch)
-		return features
+    def forward(self, im_batch):
+        features = self.model(im_batch)
+        return features
diff --git a/ethology/reid/backends/tensorrt_backend.py b/ethology/reid/backends/tensorrt_backend.py
index 8dd7d7ee..4f6e95b0 100644
--- a/ethology/reid/backends/tensorrt_backend.py
+++ b/ethology/reid/backends/tensorrt_backend.py
@@ -1,310 +1,400 @@
+# Note: LOGGER can be replaced with print or a local logger if needed
+import os
 from collections import OrderedDict, namedtuple
 
 import numpy as np
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
-# Note: LOGGER can be replaced with print or a local logger if needed
-
-import os
-import sys
-import torch
-import numpy as np
-from collections import namedtuple, OrderedDict
-
-
 
 Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
 
 
 class TensorRTBackend(BaseModelBackend):
-	def __init__(self, engine_path, device=None):
-		import hashlib
-		import requests
-		self.device = device or (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
-		self.fp16 = False
-		self.model_ = None
-		self.context = None
-		self.bindings = None
-		self.binding_addrs = None
-		self.is_trt10 = False
-		# Download engine if engine_path is a URL
-		if engine_path.startswith("http://") or engine_path.startswith("https://"):
-			# Use a hash of the URL for filename
-			engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
-			filename = f"trt_engine_{engine_hash}.engine"
-			cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
-			os.makedirs(cache_dir, exist_ok=True)
-			cached_file = os.path.join(cache_dir, filename)
-			if not os.path.exists(cached_file):
-				print(f"[TensorRT] Downloading engine from {engine_path} to {cached_file}")
-				with requests.get(engine_path, stream=True) as r:
-					r.raise_for_status()
-					with open(cached_file, 'wb') as f:
-						for chunk in r.iter_content(chunk_size=8192):
-							f.write(chunk)
-			else:
-				print(f"[TensorRT] Using cached engine at {cached_file}")
-			self.engine_path = cached_file
-		else:
-			self.engine_path = engine_path
-		self.load_model(self.engine_path)
-
-	def load_model(self, w):
-		print(f"Loading {w} for TensorRT inference...")
-		try:
-			import tensorrt as trt
-			import pycuda.driver as cuda
-			import pycuda.autoinit  # noqa: F401
-		except ImportError:
-			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH.")
-
-		if self.device.type == "cpu":
-			if torch.cuda.is_available():
-				self.device = torch.device("cuda:0")
-			else:
-				raise ValueError("CUDA device not available for TensorRT inference.")
-
-		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
-		logger = trt.Logger(trt.Logger.INFO)
-
-		# Deserialize the engine
-		with open(w, "rb") as f:
-			with trt.Runtime(logger) as runtime:
-				self.model_ = runtime.deserialize_cuda_engine(f.read())
-
-		# Execution context
-		self.context = self.model_.create_execution_context()
-		self.bindings = OrderedDict()
-
-		self.is_trt10 = not hasattr(self.model_, "num_bindings")
-		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
-
-		# Parse bindings
-		for index in num:
-			if self.is_trt10:
-				name = self.model_.get_tensor_name(index)
-				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
-					self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_tensor_shape(name))
-
-			else:
-				name = self.model_.get_binding_name(index)
-				dtype = trt.nptype(self.model_.get_binding_dtype(index))
-				is_input = self.model_.binding_is_input(index)
-
-				# Handle dynamic shapes
-				if is_input and -1 in self.model_.get_binding_shape(index):
-					profile_index = 0
-					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
-					self.context.set_binding_shape(index, opt_shape)
-
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_binding_shape(index))
-			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
-			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-
-		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
-
-		# Execution context
-		self.context = self.model_.create_execution_context()
-		self.bindings = OrderedDict()
-
-		self.is_trt10 = not hasattr(self.model_, "num_bindings")
-		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
-
-		# Parse bindings
-		for index in num:
-			if self.is_trt10:
-				name = self.model_.get_tensor_name(index)
-				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
-						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_tensor_shape(name))
-
-			else:
-				name = self.model_.get_binding_name(index)
-				dtype = trt.nptype(self.model_.get_binding_dtype(index))
-				is_input = self.model_.binding_is_input(index)
-
-				# Handle dynamic shapes
-				if is_input and -1 in self.model_.get_binding_shape(index):
-					profile_index = 0
-					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
-					self.context.set_binding_shape(index, opt_shape)
-
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_binding_shape(index))
-			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
-			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-
-		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
-
-	def forward(self, im_batch):
-		temp_im_batch = im_batch.clone()
-		batch_array = []
-		inp_batch = im_batch.shape[0]
-		out_batch = self.bindings["output"].shape[0]
-		resultant_features = []
-
-		# Divide batch to sub batches
-		while inp_batch > out_batch:
-			batch_array.append(temp_im_batch[:out_batch])
-			temp_im_batch = temp_im_batch[out_batch:]
-			inp_batch = temp_im_batch.shape[0]
-		if temp_im_batch.shape[0] > 0:
-			batch_array.append(temp_im_batch)
-
-		for temp_batch in batch_array:
-			# Adjust for dynamic shapes
-			if temp_batch.shape != self.bindings["images"].shape:
-				if self.is_trt10:
-					self.context.set_input_shape("images", temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
-				else:
-					i_in = self.model_.get_binding_index("images")
-					i_out = self.model_.get_binding_index("output")
-					self.context.set_binding_shape(i_in, temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					output_shape = tuple(self.context.get_binding_shape(i_out))
-					self.bindings["output"].data.resize_(output_shape)
-
-			s = self.bindings["images"].shape
-			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
-
-			self.binding_addrs["images"] = int(temp_batch.data_ptr())
-
-			# Execute inference
-			self.context.execute_v2(list(self.binding_addrs.values()))
-			features = self.bindings["output"].data
-			resultant_features.append(features.clone())
-
-		if len(resultant_features) == 1:
-			return resultant_features[0]
-		else:
-			rslt_features = torch.cat(resultant_features, dim=0)
-			rslt_features = rslt_features[: im_batch.shape[0]]
-			return rslt_features
-
-	def load_model(self, w):
-		print(f"Loading {w} for TensorRT inference...")
-		# self.checker.check_packages(("nvidia-tensorrt",))
-		try:
-			import tensorrt as trt  # TensorRT library
-		except ImportError:
-			raise ImportError("Please install tensorrt to use this backend.")
-
-		if self.device.type == "cpu":
-			if torch.cuda.is_available():
-				self.device = torch.device("cuda:0")
-			else:
-				raise ValueError("CUDA device not available for TensorRT inference.")
-
-		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
-		logger = trt.Logger(trt.Logger.INFO)
-
-		# Deserialize the engine
-		with open(w, "rb") as f:
-			with trt.Runtime(logger) as runtime:
-				self.model_ = runtime.deserialize_cuda_engine(f.read())
-
-		# Execution context
-		self.context = self.model_.create_execution_context()
-		self.bindings = OrderedDict()
-
-		self.is_trt10 = not hasattr(self.model_, "num_bindings")
-		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
-
-		# Parse bindings
-		for index in num:
-			if self.is_trt10:
-				name = self.model_.get_tensor_name(index)
-				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
-						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_tensor_shape(name))
-
-			else:
-				name = self.model_.get_binding_name(index)
-				dtype = trt.nptype(self.model_.get_binding_dtype(index))
-				is_input = self.model_.binding_is_input(index)
-
-				# Handle dynamic shapes
-				if is_input and -1 in self.model_.get_binding_shape(index):
-					profile_index = 0
-					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
-					self.context.set_binding_shape(index, opt_shape)
-
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_binding_shape(index))
-			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
-			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-
-		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
-
-	def forward(self, im_batch):
-		temp_im_batch = im_batch.clone()
-		batch_array = []
-		inp_batch = im_batch.shape[0]
-		out_batch = self.bindings["output"].shape[0]
-		resultant_features = []
-
-		# Divide batch to sub batches
-		while inp_batch > out_batch:
-			batch_array.append(temp_im_batch[:out_batch])
-			temp_im_batch = temp_im_batch[out_batch:]
-			inp_batch = temp_im_batch.shape[0]
-		if temp_im_batch.shape[0] > 0:
-			batch_array.append(temp_im_batch)
-
-		for temp_batch in batch_array:
-			# Adjust for dynamic shapes
-			if temp_batch.shape != self.bindings["images"].shape:
-				if self.is_trt10:
-					self.context.set_input_shape("images", temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
-				else:
-					i_in = self.model_.get_binding_index("images")
-					i_out = self.model_.get_binding_index("output")
-					self.context.set_binding_shape(i_in, temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					output_shape = tuple(self.context.get_binding_shape(i_out))
-					self.bindings["output"].data.resize_(output_shape)
-
-			s = self.bindings["images"].shape
-			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
-
-			self.binding_addrs["images"] = int(temp_batch.data_ptr())
-
-			# Execute inference
-			self.context.execute_v2(list(self.binding_addrs.values()))
-			features = self.bindings["output"].data
-			resultant_features.append(features.clone())
-
-		if len(resultant_features) == 1:
-			return resultant_features[0]
-		else:
-			rslt_features = torch.cat(resultant_features, dim=0)
-			rslt_features = rslt_features[: im_batch.shape[0]]
-			return rslt_features
+    def __init__(self, engine_path, device=None):
+        import hashlib
+
+        import requests
+
+        self.device = device or (
+            torch.device("cuda")
+            if torch.cuda.is_available()
+            else torch.device("cpu")
+        )
+        self.fp16 = False
+        self.model_ = None
+        self.context = None
+        self.bindings = None
+        self.binding_addrs = None
+        self.is_trt10 = False
+        # Download engine if engine_path is a URL
+        if engine_path.startswith("http://") or engine_path.startswith(
+            "https://"
+        ):
+            # Use a hash of the URL for filename
+            engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
+            filename = f"trt_engine_{engine_hash}.engine"
+            cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
+            os.makedirs(cache_dir, exist_ok=True)
+            cached_file = os.path.join(cache_dir, filename)
+            if not os.path.exists(cached_file):
+                print(
+                    f"[TensorRT] Downloading engine from {engine_path} to {cached_file}"
+                )
+                with requests.get(engine_path, stream=True) as r:
+                    r.raise_for_status()
+                    with open(cached_file, "wb") as f:
+                        for chunk in r.iter_content(chunk_size=8192):
+                            f.write(chunk)
+            else:
+                print(f"[TensorRT] Using cached engine at {cached_file}")
+            self.engine_path = cached_file
+        else:
+            self.engine_path = engine_path
+        self.load_model(self.engine_path)
+
+    def load_model(self, w):
+        print(f"Loading {w} for TensorRT inference...")
+        try:
+            import pycuda.autoinit  # noqa: F401
+            import pycuda.driver as cuda
+            import tensorrt as trt
+        except ImportError:
+            raise ImportError(
+                "TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH."
+            )
+
+        if self.device.type == "cpu":
+            if torch.cuda.is_available():
+                self.device = torch.device("cuda:0")
+            else:
+                raise ValueError(
+                    "CUDA device not available for TensorRT inference."
+                )
+
+        Binding = namedtuple(
+            "Binding", ("name", "dtype", "shape", "data", "ptr")
+        )
+        logger = trt.Logger(trt.Logger.INFO)
+
+        # Deserialize the engine
+        with open(w, "rb") as f, trt.Runtime(logger) as runtime:
+            self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+        # Execution context
+        self.context = self.model_.create_execution_context()
+        self.bindings = OrderedDict()
+
+        self.is_trt10 = not hasattr(self.model_, "num_bindings")
+        num = (
+            range(self.model_.num_io_tensors)
+            if self.is_trt10
+            else range(self.model_.num_bindings)
+        )
+
+        # Parse bindings
+        for index in num:
+            if self.is_trt10:
+                name = self.model_.get_tensor_name(index)
+                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+                is_input = (
+                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input and -1 in tuple(
+                    self.model_.get_tensor_shape(name)
+                ):
+                    self.context.set_input_shape(
+                        name,
+                        tuple(
+                            self.model_.get_tensor_profile_shape(name, 0)[1]
+                        ),
+                    )
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_tensor_shape(name))
+
+            else:
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                is_input = self.model_.binding_is_input(index)
+
+                # Handle dynamic shapes
+                if is_input and -1 in self.model_.get_binding_shape(index):
+                    profile_index = 0
+                    min_shape, opt_shape, max_shape = (
+                        self.model_.get_profile_shape(profile_index, index)
+                    )
+                    self.context.set_binding_shape(index, opt_shape)
+
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_binding_shape(index))
+            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
+                self.device
+            )
+            self.bindings[name] = Binding(
+                name, dtype, shape, data, int(data.data_ptr())
+            )
+
+        self.binding_addrs = OrderedDict(
+            (n, d.ptr) for n, d in self.bindings.items()
+        )
+
+        # Execution context
+        self.context = self.model_.create_execution_context()
+        self.bindings = OrderedDict()
+
+        self.is_trt10 = not hasattr(self.model_, "num_bindings")
+        num = (
+            range(self.model_.num_io_tensors)
+            if self.is_trt10
+            else range(self.model_.num_bindings)
+        )
+
+        # Parse bindings
+        for index in num:
+            if self.is_trt10:
+                name = self.model_.get_tensor_name(index)
+                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+                is_input = (
+                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input and -1 in tuple(
+                    self.model_.get_tensor_shape(name)
+                ):
+                    self.context.set_input_shape(
+                        name,
+                        tuple(
+                            self.model_.get_tensor_profile_shape(name, 0)[1]
+                        ),
+                    )
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_tensor_shape(name))
+
+            else:
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                is_input = self.model_.binding_is_input(index)
+
+                # Handle dynamic shapes
+                if is_input and -1 in self.model_.get_binding_shape(index):
+                    profile_index = 0
+                    min_shape, opt_shape, max_shape = (
+                        self.model_.get_profile_shape(profile_index, index)
+                    )
+                    self.context.set_binding_shape(index, opt_shape)
+
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_binding_shape(index))
+            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
+                self.device
+            )
+            self.bindings[name] = Binding(
+                name, dtype, shape, data, int(data.data_ptr())
+            )
+
+        self.binding_addrs = OrderedDict(
+            (n, d.ptr) for n, d in self.bindings.items()
+        )
+
+    def forward(self, im_batch):
+        temp_im_batch = im_batch.clone()
+        batch_array = []
+        inp_batch = im_batch.shape[0]
+        out_batch = self.bindings["output"].shape[0]
+        resultant_features = []
+
+        # Divide batch to sub batches
+        while inp_batch > out_batch:
+            batch_array.append(temp_im_batch[:out_batch])
+            temp_im_batch = temp_im_batch[out_batch:]
+            inp_batch = temp_im_batch.shape[0]
+        if temp_im_batch.shape[0] > 0:
+            batch_array.append(temp_im_batch)
+
+        for temp_batch in batch_array:
+            # Adjust for dynamic shapes
+            if temp_batch.shape != self.bindings["images"].shape:
+                if self.is_trt10:
+                    self.context.set_input_shape("images", temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    self.bindings["output"].data.resize_(
+                        tuple(self.context.get_tensor_shape("output"))
+                    )
+                else:
+                    i_in = self.model_.get_binding_index("images")
+                    i_out = self.model_.get_binding_index("output")
+                    self.context.set_binding_shape(i_in, temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    output_shape = tuple(self.context.get_binding_shape(i_out))
+                    self.bindings["output"].data.resize_(output_shape)
+
+            s = self.bindings["images"].shape
+            assert temp_batch.shape == s, (
+                f"Input size {temp_batch.shape} does not match model size {s}"
+            )
+
+            self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+            # Execute inference
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            features = self.bindings["output"].data
+            resultant_features.append(features.clone())
+
+        if len(resultant_features) == 1:
+            return resultant_features[0]
+        else:
+            rslt_features = torch.cat(resultant_features, dim=0)
+            rslt_features = rslt_features[: im_batch.shape[0]]
+            return rslt_features
+
+    def load_model(self, w):
+        print(f"Loading {w} for TensorRT inference...")
+        # self.checker.check_packages(("nvidia-tensorrt",))
+        try:
+            import tensorrt as trt  # TensorRT library
+        except ImportError:
+            raise ImportError("Please install tensorrt to use this backend.")
+
+        if self.device.type == "cpu":
+            if torch.cuda.is_available():
+                self.device = torch.device("cuda:0")
+            else:
+                raise ValueError(
+                    "CUDA device not available for TensorRT inference."
+                )
+
+        Binding = namedtuple(
+            "Binding", ("name", "dtype", "shape", "data", "ptr")
+        )
+        logger = trt.Logger(trt.Logger.INFO)
+
+        # Deserialize the engine
+        with open(w, "rb") as f, trt.Runtime(logger) as runtime:
+            self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+        # Execution context
+        self.context = self.model_.create_execution_context()
+        self.bindings = OrderedDict()
+
+        self.is_trt10 = not hasattr(self.model_, "num_bindings")
+        num = (
+            range(self.model_.num_io_tensors)
+            if self.is_trt10
+            else range(self.model_.num_bindings)
+        )
+
+        # Parse bindings
+        for index in num:
+            if self.is_trt10:
+                name = self.model_.get_tensor_name(index)
+                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+                is_input = (
+                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input and -1 in tuple(
+                    self.model_.get_tensor_shape(name)
+                ):
+                    self.context.set_input_shape(
+                        name,
+                        tuple(
+                            self.model_.get_tensor_profile_shape(name, 0)[1]
+                        ),
+                    )
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_tensor_shape(name))
+
+            else:
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                is_input = self.model_.binding_is_input(index)
+
+                # Handle dynamic shapes
+                if is_input and -1 in self.model_.get_binding_shape(index):
+                    profile_index = 0
+                    min_shape, opt_shape, max_shape = (
+                        self.model_.get_profile_shape(profile_index, index)
+                    )
+                    self.context.set_binding_shape(index, opt_shape)
+
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_binding_shape(index))
+            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
+                self.device
+            )
+            self.bindings[name] = Binding(
+                name, dtype, shape, data, int(data.data_ptr())
+            )
+
+        self.binding_addrs = OrderedDict(
+            (n, d.ptr) for n, d in self.bindings.items()
+        )
+
+    def forward(self, im_batch):
+        temp_im_batch = im_batch.clone()
+        batch_array = []
+        inp_batch = im_batch.shape[0]
+        out_batch = self.bindings["output"].shape[0]
+        resultant_features = []
+
+        # Divide batch to sub batches
+        while inp_batch > out_batch:
+            batch_array.append(temp_im_batch[:out_batch])
+            temp_im_batch = temp_im_batch[out_batch:]
+            inp_batch = temp_im_batch.shape[0]
+        if temp_im_batch.shape[0] > 0:
+            batch_array.append(temp_im_batch)
+
+        for temp_batch in batch_array:
+            # Adjust for dynamic shapes
+            if temp_batch.shape != self.bindings["images"].shape:
+                if self.is_trt10:
+                    self.context.set_input_shape("images", temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    self.bindings["output"].data.resize_(
+                        tuple(self.context.get_tensor_shape("output"))
+                    )
+                else:
+                    i_in = self.model_.get_binding_index("images")
+                    i_out = self.model_.get_binding_index("output")
+                    self.context.set_binding_shape(i_in, temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    output_shape = tuple(self.context.get_binding_shape(i_out))
+                    self.bindings["output"].data.resize_(output_shape)
+
+            s = self.bindings["images"].shape
+            assert temp_batch.shape == s, (
+                f"Input size {temp_batch.shape} does not match model size {s}"
+            )
+
+            self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+            # Execute inference
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            features = self.bindings["output"].data
+            resultant_features.append(features.clone())
+
+        if len(resultant_features) == 1:
+            return resultant_features[0]
+        else:
+            rslt_features = torch.cat(resultant_features, dim=0)
+            rslt_features = rslt_features[: im_batch.shape[0]]
+            return rslt_features
diff --git a/ethology/reid/backends/tflite_backend.py b/ethology/reid/backends/tflite_backend.py
index b0a7b707..eb10d4e8 100644
--- a/ethology/reid/backends/tflite_backend.py
+++ b/ethology/reid/backends/tflite_backend.py
@@ -4,37 +4,39 @@
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+
 # Note: LOGGER can be replaced with print or a local logger if needed
 
+
 class TFLiteBackend(BaseModelBackend):
-	"""
-	A class to handle TensorFlow Lite model inference with dynamic batch size support.
-	"""
-	def __init__(self, weights: Path, device: str, half: bool):
-		super().__init__(weights, device, half)
-		self.nhwc = True
-		self.half = False
-
-	def load_model(self, w):
-		# self.checker.check_packages(("tensorflow",))
-		print(f"Loading {str(w)} for TensorFlow Lite inference...")
-		import tensorflow as tf
-		self.interpreter = tf.lite.Interpreter(model_path=str(w))
-		self.interpreter.allocate_tensors()
-		self.input_details = self.interpreter.get_input_details()
-		self.output_details = self.interpreter.get_output_details()
-		self.current_allocated_batch_size = self.input_details[0]["shape"][0]
-
-	def forward(self, im_batch: torch.Tensor) -> np.ndarray:
-		im_batch = im_batch.cpu().numpy()
-		batch_size = im_batch.shape[0]
-		if batch_size != self.current_allocated_batch_size:
-			self.interpreter.resize_tensor_input(
-				self.input_details[0]["index"], [batch_size, 256, 128, 3]
-			)
-			self.interpreter.allocate_tensors()
-			self.current_allocated_batch_size = batch_size
-		self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
-		self.interpreter.invoke()
-		features = self.interpreter.get_tensor(self.output_details[0]["index"])
-		return features
+    """A class to handle TensorFlow Lite model inference with dynamic batch size support."""
+
+    def __init__(self, weights: Path, device: str, half: bool):
+        super().__init__(weights, device, half)
+        self.nhwc = True
+        self.half = False
+
+    def load_model(self, w):
+        # self.checker.check_packages(("tensorflow",))
+        print(f"Loading {str(w)} for TensorFlow Lite inference...")
+        import tensorflow as tf
+
+        self.interpreter = tf.lite.Interpreter(model_path=str(w))
+        self.interpreter.allocate_tensors()
+        self.input_details = self.interpreter.get_input_details()
+        self.output_details = self.interpreter.get_output_details()
+        self.current_allocated_batch_size = self.input_details[0]["shape"][0]
+
+    def forward(self, im_batch: torch.Tensor) -> np.ndarray:
+        im_batch = im_batch.cpu().numpy()
+        batch_size = im_batch.shape[0]
+        if batch_size != self.current_allocated_batch_size:
+            self.interpreter.resize_tensor_input(
+                self.input_details[0]["index"], [batch_size, 256, 128, 3]
+            )
+            self.interpreter.allocate_tensors()
+            self.current_allocated_batch_size = batch_size
+        self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
+        self.interpreter.invoke()
+        features = self.interpreter.get_tensor(self.output_details[0]["index"])
+        return features
diff --git a/ethology/reid/backends/torchscript_backend.py b/ethology/reid/backends/torchscript_backend.py
index b6602171..1142fcc4 100644
--- a/ethology/reid/backends/torchscript_backend.py
+++ b/ethology/reid/backends/torchscript_backend.py
@@ -1,20 +1,21 @@
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+
 # Note: LOGGER can be replaced with print or a local logger if needed
 
-class TorchscriptBackend(BaseModelBackend):
 
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
+class TorchscriptBackend(BaseModelBackend):
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
 
-	def load_model(self, w):
-		print(f"Loading {w} for TorchScript inference...")
-		self.model = torch.jit.load(w)
-		self.model.half() if self.half else self.model.float()
+    def load_model(self, w):
+        print(f"Loading {w} for TorchScript inference...")
+        self.model = torch.jit.load(w)
+        self.model.half() if self.half else self.model.float()
 
-	def forward(self, im_batch):
-		features = self.model(im_batch)
-		return features
+    def forward(self, im_batch):
+        features = self.model(im_batch)
+        return features
diff --git a/ethology/reid/core/auto_backend.py b/ethology/reid/core/auto_backend.py
index 6f43eba2..22f2c4e2 100644
--- a/ethology/reid/core/auto_backend.py
+++ b/ethology/reid/core/auto_backend.py
@@ -1,74 +1,89 @@
-
 from pathlib import Path
-from typing import Tuple, Union
+
 import torch
+
 from ethology.reid.backends.onnx_backend import ONNXBackend
 from ethology.reid.backends.openvino_backend import OpenVinoBackend
 from ethology.reid.backends.pytorch_backend import PyTorchBackend
+
 try:
-	from ethology.reid.backends.tensorrt_backend import TensorRTBackend
+    from ethology.reid.backends.tensorrt_backend import TensorRTBackend
 except ImportError:
-	class TensorRTBackend:
-		def __init__(self, *args, **kwargs):
-			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH.")
+
+    class TensorRTBackend:
+        def __init__(self, *args, **kwargs):
+            raise ImportError(
+                "TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH."
+            )
+
+
 from ethology.reid.backends.tflite_backend import TFLiteBackend
 from ethology.reid.backends.torchscript_backend import TorchscriptBackend
+
 # from ethology.reid.core import export_formats  # If needed, implement or copy export_formats
 # from ethology.utils import WEIGHTS  # If needed, implement or set WEIGHTS
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 # from ethology.utils.torch_utils import select_device  # If needed, implement or set select_device
 
+
 class ReidAutoBackend:
-	def __init__(
-		self,
-		weights: Path,
-		device: torch.device = torch.device("cpu"),
-		half: bool = False,
-	):
-		super().__init__()
-		w = weights[0] if isinstance(weights, list) else weights
-		(
-			self.pt,
-			self.pth,
-			self.jit,
-			self.onnx,
-			self.xml,
-			self.engine,
-			self.tflite,
-		) = self.model_type(w)
-		self.weights = weights
-		self.device = device  # For simplicity, skip select_device for now
-		self.half = half
-		self.model = self.get_backend()
+    def __init__(
+        self,
+        weights: Path,
+        device: torch.device = torch.device("cpu"),
+        half: bool = False,
+    ):
+        super().__init__()
+        w = weights[0] if isinstance(weights, list) else weights
+        (
+            self.pt,
+            self.pth,
+            self.jit,
+            self.onnx,
+            self.xml,
+            self.engine,
+            self.tflite,
+        ) = self.model_type(w)
+        self.weights = weights
+        self.device = device  # For simplicity, skip select_device for now
+        self.half = half
+        self.model = self.get_backend()
 
-	def get_backend(self):
-		backend_map = {
-			self.pt or self.pth: PyTorchBackend,
-			self.jit: TorchscriptBackend,
-			self.onnx: ONNXBackend,
-			self.engine: TensorRTBackend,
-			self.xml: OpenVinoBackend,
-			self.tflite: TFLiteBackend,
-		}
-		for condition, backend_class in backend_map.items():
-			if condition:
-				return backend_class(self.weights, self.device, self.half)
-		raise RuntimeError("This model framework is not supported yet!")
+    def get_backend(self):
+        backend_map = {
+            self.pt or self.pth: PyTorchBackend,
+            self.jit: TorchscriptBackend,
+            self.onnx: ONNXBackend,
+            self.engine: TensorRTBackend,
+            self.xml: OpenVinoBackend,
+            self.tflite: TFLiteBackend,
+        }
+        for condition, backend_class in backend_map.items():
+            if condition:
+                return backend_class(self.weights, self.device, self.half)
+        raise RuntimeError("This model framework is not supported yet!")
 
-	def check_suffix(self, file: Path = "osnet_x0_25_msmt17.pt", suffix: Union[str, Tuple[str, ...]] = (".pt",), msg: str = ""):
-		suffix = [suffix] if isinstance(suffix, str) else list(suffix)
-		files = [file] if isinstance(file, (str, Path)) else list(file)
-		for f in files:
-			file_suffix = Path(f).suffix.lower()
-			if file_suffix and file_suffix not in suffix:
-				print(f"File {f} does not have an acceptable suffix. Expected: {suffix}")
+    def check_suffix(
+        self,
+        file: Path = "osnet_x0_25_msmt17.pt",
+        suffix: str | tuple[str, ...] = (".pt",),
+        msg: str = "",
+    ):
+        suffix = [suffix] if isinstance(suffix, str) else list(suffix)
+        files = [file] if isinstance(file, (str, Path)) else list(file)
+        for f in files:
+            file_suffix = Path(f).suffix.lower()
+            if file_suffix and file_suffix not in suffix:
+                print(
+                    f"File {f} does not have an acceptable suffix. Expected: {suffix}"
+                )
 
-	def model_type(self, p: Path) -> Tuple[bool, ...]:
-		# For demo, just check for .pt
-		sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
-		self.check_suffix(p, sf)
-		types = [str(Path(p)).endswith(s) for s in sf]
-		# OpenVINO explicit check
-		if Path(p).suffix in ['.xml', '.bin']:
-			types[3] = True
-		return tuple(types)
+    def model_type(self, p: Path) -> tuple[bool, ...]:
+        # For demo, just check for .pt
+        sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
+        self.check_suffix(p, sf)
+        types = [str(Path(p)).endswith(s) for s in sf]
+        # OpenVINO explicit check
+        if Path(p).suffix in [".xml", ".bin"]:
+            types[3] = True
+        return tuple(types)
diff --git a/ethology/reid/core/config.py b/ethology/reid/core/config.py
index 926c0cc9..dc17cc14 100644
--- a/ethology/reid/core/config.py
+++ b/ethology/reid/core/config.py
@@ -1,16 +1,16 @@
 MODEL_TYPES = [
-	"resnet50",
-	"resnet101",
-	"mlfn",
-	"hacnn",
-	"mobilenetv2_x1_0",
-	"mobilenetv2_x1_4",
-	"osnet_x1_0",
-	"osnet_x0_75",
-	"osnet_x0_5",
-	"osnet_x0_25",
-	"osnet_ibn_x1_0",
-	"osnet_ain_x1_0",
-	"lmbn_n",
-	"clip",
+    "resnet50",
+    "resnet101",
+    "mlfn",
+    "hacnn",
+    "mobilenetv2_x1_0",
+    "mobilenetv2_x1_4",
+    "osnet_x1_0",
+    "osnet_x0_75",
+    "osnet_x0_5",
+    "osnet_x0_25",
+    "osnet_ibn_x1_0",
+    "osnet_ain_x1_0",
+    "lmbn_n",
+    "clip",
 ]
diff --git a/ethology/reid/core/factory.py b/ethology/reid/core/factory.py
index bc8b6ab1..27406383 100644
--- a/ethology/reid/core/factory.py
+++ b/ethology/reid/core/factory.py
@@ -1,30 +1,44 @@
-
 # Import model constructors from ethology's local backbones
 from ethology.reid.backbones.hacnn import HACNN
 from ethology.reid.backbones.mlfn import mlfn
-from ethology.reid.backbones.mobilenetv2 import mobilenetv2_x1_0, mobilenetv2_x1_4
-from ethology.reid.backbones.osnet import osnet_ibn_x1_0, osnet_x0_5, osnet_x0_25, osnet_x0_75, osnet_x1_0
-from ethology.reid.backbones.osnet_ain import osnet_ain_x0_5, osnet_ain_x0_25, osnet_ain_x0_75, osnet_ain_x1_0
+from ethology.reid.backbones.mobilenetv2 import (
+    mobilenetv2_x1_0,
+    mobilenetv2_x1_4,
+)
+from ethology.reid.backbones.osnet import (
+    osnet_ibn_x1_0,
+    osnet_x0_5,
+    osnet_x0_25,
+    osnet_x0_75,
+    osnet_x1_0,
+)
+from ethology.reid.backbones.osnet_ain import (
+    osnet_ain_x0_5,
+    osnet_ain_x0_25,
+    osnet_ain_x0_75,
+    osnet_ain_x1_0,
+)
 from ethology.reid.backbones.resnet import resnet50, resnet101
+
 # from ethology.reid.backbones.lmbn.lmbn_n import LMBN_n  # If present
 # from ethology.reid.backbones.clip.make_model import make_model  # If present
 
 MODEL_FACTORY = {
-	"resnet50": resnet50,
-	"resnet101": resnet101,
-	"mobilenetv2_x1_0": mobilenetv2_x1_0,
-	"mobilenetv2_x1_4": mobilenetv2_x1_4,
-	"hacnn": HACNN,
-	"mlfn": mlfn,
-	"osnet_x1_0": osnet_x1_0,
-	"osnet_x0_75": osnet_x0_75,
-	"osnet_x0_5": osnet_x0_5,
-	"osnet_x0_25": osnet_x0_25,
-	"osnet_ibn_x1_0": osnet_ibn_x1_0,
-	"osnet_ain_x1_0": osnet_ain_x1_0,
-	"osnet_ain_x0_75": osnet_ain_x0_75,
-	"osnet_ain_x0_5": osnet_ain_x0_5,
-	"osnet_ain_x0_25": osnet_ain_x0_25,
-	# "lmbn_n": LMBN_n,  # Uncomment if implemented
-	# "clip": make_model,  # Uncomment if implemented
+    "resnet50": resnet50,
+    "resnet101": resnet101,
+    "mobilenetv2_x1_0": mobilenetv2_x1_0,
+    "mobilenetv2_x1_4": mobilenetv2_x1_4,
+    "hacnn": HACNN,
+    "mlfn": mlfn,
+    "osnet_x1_0": osnet_x1_0,
+    "osnet_x0_75": osnet_x0_75,
+    "osnet_x0_5": osnet_x0_5,
+    "osnet_x0_25": osnet_x0_25,
+    "osnet_ibn_x1_0": osnet_ibn_x1_0,
+    "osnet_ain_x1_0": osnet_ain_x1_0,
+    "osnet_ain_x0_75": osnet_ain_x0_75,
+    "osnet_ain_x0_5": osnet_ain_x0_5,
+    "osnet_ain_x0_25": osnet_ain_x0_25,
+    # "lmbn_n": LMBN_n,  # Uncomment if implemented
+    # "clip": make_model,  # Uncomment if implemented
 }
diff --git a/ethology/reid/core/handler.py b/ethology/reid/core/handler.py
index b5e51391..ba521ab2 100644
--- a/ethology/reid/core/handler.py
+++ b/ethology/reid/core/handler.py
@@ -2,32 +2,35 @@
 
 # Thin wrapper to use BoxMOT ReID models in ethology
 from pathlib import Path
-from typing import Union
-import numpy as np
 
+import numpy as np
 
 # Import ethology's local ReID handler
 from ethology.reid.core.reid_handler import ReID as EthologyReID
 
+
 class ReIDHandler:
-    """
-    Ethology ReID handler using local models and backends.
-    """
-    def __init__(self, weights: Union[str, Path], device='cpu', half=False):
+    """Ethology ReID handler using local models and backends."""
+
+    def __init__(self, weights: str | Path, device="cpu", half=False):
         self.model = EthologyReID(weights=weights, device=device, half=half)
 
-    def extract_features(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
-        """
-        Extract feature embeddings for detections in a frame.
+    def extract_features(
+        self, frame: np.ndarray, dets: np.ndarray
+    ) -> np.ndarray:
+        """Extract feature embeddings for detections in a frame.
+
         Parameters
         ----------
         frame : np.ndarray
             (H, W, C) BGR image.
         dets : np.ndarray
             (N, 6) array of detections (x1, y1, x2, y2, conf, cls).
+
         Returns
         -------
         np.ndarray
             (N, D) feature embeddings.
+
         """
         return self.model(frame, dets)
diff --git a/ethology/reid/core/registry.py b/ethology/reid/core/registry.py
index 333cff2f..4b9c27fd 100644
--- a/ethology/reid/core/registry.py
+++ b/ethology/reid/core/registry.py
@@ -1,71 +1,88 @@
-
 from collections import OrderedDict
+
 import torch
-from ethology.reid.core.config import MODEL_TYPES  #, NR_CLASSES_DICT, TRAINED_URLS
+
+from ethology.reid.core.config import (
+    MODEL_TYPES,  # , NR_CLASSES_DICT, TRAINED_URLS
+)
 from ethology.reid.core.factory import MODEL_FACTORY
+
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 
+
 class ReIDModelRegistry:
-	"""Encapsulates model registration and related utilities."""
+    """Encapsulates model registration and related utilities."""
 
-	@staticmethod
-	def show_downloadable_models():
-		# LOGGER.info("Available .pt ReID models for automatic download")
-		# LOGGER.info(list(TRAINED_URLS.keys()))
-		pass
+    @staticmethod
+    def show_downloadable_models():
+        # LOGGER.info("Available .pt ReID models for automatic download")
+        # LOGGER.info(list(TRAINED_URLS.keys()))
+        pass
 
-	@staticmethod
-	def get_model_name(model):
-		for name in MODEL_TYPES:
-			if name in model.name:
-				return name
-		return None
+    @staticmethod
+    def get_model_name(model):
+        for name in MODEL_TYPES:
+            if name in model.name:
+                return name
+        return None
 
-	@staticmethod
-	def get_model_url(model):
-		# return TRAINED_URLS.get(model.name, None)
-		return None
+    @staticmethod
+    def get_model_url(model):
+        # return TRAINED_URLS.get(model.name, None)
+        return None
 
-	@staticmethod
-	def load_pretrained_weights(model, weight_path):
-		device = "cpu" if not torch.cuda.is_available() else None
-		checkpoint = torch.load(
-			weight_path,
-			map_location=torch.device("cpu") if device == "cpu" else None,
-			weights_only=False,
-			encoding='latin1',
-		)
-		state_dict = checkpoint.get("state_dict", checkpoint)
-		model_dict = model.state_dict()
-		new_state_dict = OrderedDict()
-		matched_layers, discarded_layers = [], []
-		for k, v in state_dict.items():
-			key = k[7:] if k.startswith("module.") else k
-			if key in model_dict and model_dict[key].size() == v.size():
-				new_state_dict[key] = v
-				matched_layers.append(key)
-			else:
-				discarded_layers.append(key)
-		model_dict.update(new_state_dict)
-		model.load_state_dict(model_dict)
+    @staticmethod
+    def load_pretrained_weights(model, weight_path):
+        device = "cpu" if not torch.cuda.is_available() else None
+        checkpoint = torch.load(
+            weight_path,
+            map_location=torch.device("cpu") if device == "cpu" else None,
+            weights_only=False,
+            encoding="latin1",
+        )
+        state_dict = checkpoint.get("state_dict", checkpoint)
+        model_dict = model.state_dict()
+        new_state_dict = OrderedDict()
+        matched_layers, discarded_layers = [], []
+        for k, v in state_dict.items():
+            key = k[7:] if k.startswith("module.") else k
+            if key in model_dict and model_dict[key].size() == v.size():
+                new_state_dict[key] = v
+                matched_layers.append(key)
+            else:
+                discarded_layers.append(key)
+        model_dict.update(new_state_dict)
+        model.load_state_dict(model_dict)
 
-	@staticmethod
-	def show_available_models():
-		# LOGGER.info("Available models:")
-		# LOGGER.info(list(MODEL_FACTORY.keys()))
-		pass
+    @staticmethod
+    def show_available_models():
+        # LOGGER.info("Available models:")
+        # LOGGER.info(list(MODEL_FACTORY.keys()))
+        pass
 
-	@staticmethod
-	def get_nr_classes(weights):
-		# dataset_key = weights.name.split("_")[1]
-		# return NR_CLASSES_DICT.get(dataset_key, 1)
-		return 1
+    @staticmethod
+    def get_nr_classes(weights):
+        # dataset_key = weights.name.split("_")[1]
+        # return NR_CLASSES_DICT.get(dataset_key, 1)
+        return 1
 
-	@staticmethod
-	def build_model(name, weights, num_classes, loss="softmax", pretrained=True, use_gpu=True):
-		if name not in MODEL_FACTORY:
-			available = list(MODEL_FACTORY.keys())
-			raise KeyError(f"Unknown model '{name}'. Must be one of {available}")
-		return MODEL_FACTORY[name](
-			num_classes=num_classes, loss=loss, pretrained=pretrained, use_gpu=use_gpu
-		)
+    @staticmethod
+    def build_model(
+        name,
+        weights,
+        num_classes,
+        loss="softmax",
+        pretrained=True,
+        use_gpu=True,
+    ):
+        if name not in MODEL_FACTORY:
+            available = list(MODEL_FACTORY.keys())
+            raise KeyError(
+                f"Unknown model '{name}'. Must be one of {available}"
+            )
+        return MODEL_FACTORY[name](
+            num_classes=num_classes,
+            loss=loss,
+            pretrained=pretrained,
+            use_gpu=use_gpu,
+        )
diff --git a/ethology/reid/core/reid_handler.py b/ethology/reid/core/reid_handler.py
index 2c72658a..62d42209 100644
--- a/ethology/reid/core/reid_handler.py
+++ b/ethology/reid/core/reid_handler.py
@@ -1,28 +1,33 @@
-
 from pathlib import Path
-from typing import Union
+
 import numpy as np
+
 from ethology.reid.core.auto_backend import ReidAutoBackend
 
+
 class ReID:
-	def __init__(self, weights: Union[str, Path], device='cpu', half=False):
-		self.weights = Path(weights)
-		self.device = device
-		self.half = half
-		self.backend = ReidAutoBackend(weights=self.weights, device=device, half=half)
-		self.model = self.backend.model
-
-	def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
-		"""
-		Extract features for detections in a frame.
-		Args:
-			frame: (H, W, C) BGR image
-			dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
-		Returns:
-			embs: (N, D) embeddings.
-		"""
-		if dets.shape[0] == 0:
-			return np.empty((0, 0))
-		xyxy = dets[:, :4]
-		embs = self.model.get_features(xyxy, frame)
-		return embs
+    def __init__(self, weights: str | Path, device="cpu", half=False):
+        self.weights = Path(weights)
+        self.device = device
+        self.half = half
+        self.backend = ReidAutoBackend(
+            weights=self.weights, device=device, half=half
+        )
+        self.model = self.backend.model
+
+    def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
+        """Extract features for detections in a frame.
+
+        Args:
+                frame: (H, W, C) BGR image
+                dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
+
+        Returns:
+                embs: (N, D) embeddings.
+
+        """
+        if dets.shape[0] == 0:
+            return np.empty((0, 0))
+        xyxy = dets[:, :4]
+        embs = self.model.get_features(xyxy, frame)
+        return embs
diff --git a/tests/test_unit/test_reid_handler.py b/tests/test_unit/test_reid_handler.py
index 3a5146cf..bc8a199c 100644
--- a/tests/test_unit/test_reid_handler.py
+++ b/tests/test_unit/test_reid_handler.py
@@ -1,12 +1,16 @@
 import numpy as np
+
 from ethology.reid.core.handler import ReIDHandler
 
+
 def test_extract_features_shape():
-    handler = ReIDHandler(weights='osnet_x0_25_imagenet.pth')
+    handler = ReIDHandler(weights="osnet_x0_25_imagenet.pth")
     frame = np.random.randint(0, 255, (128, 64, 3), dtype=np.uint8)
-    dets = np.array([
-        [10, 10, 50, 100, 0.9, 1],
-        [60, 20, 100, 110, 0.8, 2],
-    ])
+    dets = np.array(
+        [
+            [10, 10, 50, 100, 0.9, 1],
+            [60, 20, 100, 110, 0.8, 2],
+        ]
+    )
     feats = handler.extract_features(frame, dets)
     assert feats.shape[0] == dets.shape[0]

From 045b9df95bf8a5987e188d2acddeefd32ccfd1a4 Mon Sep 17 00:00:00 2001
From: AnandMayank <anandmayank698@gmail.com>
Date: Wed, 18 Feb 2026 19:21:29 +0530
Subject: [PATCH 07/12] style(reid): fix ruff errors in hacnn.py and
 mlfn.py\n\n- Add missing docstrings\n- Use super() instead of super(Class,
 self)\n- Avoid mutable default arguments\n- Fix long lines and other ruff
 issues

---
 ethology/reid/backbones/hacnn.py              | 578 ++++++------
 ethology/reid/backbones/mlfn.py               |   8 +-
 ethology/reid/backbones/mobilenetv2.py        | 456 +++++-----
 ethology/reid/backbones/osnet.py              | 800 +++++++----------
 ethology/reid/backbones/osnet_ain.py          | 831 +++++++-----------
 ethology/reid/backbones/resnet.py             | 271 ++++++
 ethology/reid/backends/base_backend.py        |  48 +-
 ethology/reid/backends/onnx_backend.py        |  55 +-
 ethology/reid/backends/openvino_backend.py    |  83 +-
 ethology/reid/backends/pytorch_backend.py     |  28 +-
 ethology/reid/backends/tensorrt_backend.py    | 688 +++++++--------
 ethology/reid/backends/tflite_backend.py      |  64 +-
 ethology/reid/backends/torchscript_backend.py |  25 +-
 ethology/reid/core/auto_backend.py            | 127 ++-
 ethology/reid/core/config.py                  |  28 +-
 ethology/reid/core/factory.py                 |  56 +-
 ethology/reid/core/handler.py                 |  21 +-
 ethology/reid/core/registry.py                | 133 ++-
 ethology/reid/core/reid_handler.py            |  51 +-
 tests/test_unit/test_reid_handler.py          |  14 +-
 20 files changed, 2024 insertions(+), 2341 deletions(-)

diff --git a/ethology/reid/backbones/hacnn.py b/ethology/reid/backbones/hacnn.py
index 9394ad30..f3a65746 100644
--- a/ethology/reid/backbones/hacnn.py
+++ b/ethology/reid/backbones/hacnn.py
@@ -1,5 +1,6 @@
-# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+"""HACNN backbone for person re-identification."""
 
+from __future__ import absolute_import, division
 
 import torch
 from torch import nn
@@ -9,315 +10,298 @@
 
 
 class ConvBlock(nn.Module):
-    def __init__(self, in_c, out_c, k, s=1, p=0):
-        super(ConvBlock, self).__init__()
-        self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
-        self.bn = nn.BatchNorm2d(out_c)
-
-    def forward(self, x):
-        return F.relu(self.bn(self.conv(x)))
-
+	def __init__(self, in_c, out_c, k, s=1, p=0):
+		"""Convolutional block with batch norm and ReLU."""
+		super().__init__()
+		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
+		self.bn = nn.BatchNorm2d(out_c)
+	def forward(self, x):
+		return F.relu(self.bn(self.conv(x)))
 
 class InceptionA(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(InceptionA, self).__init__()
-        mid_channels = out_channels // 4
-        self.stream1 = nn.Sequential(
-            ConvBlock(in_channels, mid_channels, 1),
-            ConvBlock(mid_channels, mid_channels, 3, p=1),
-        )
-        self.stream2 = nn.Sequential(
-            ConvBlock(in_channels, mid_channels, 1),
-            ConvBlock(mid_channels, mid_channels, 3, p=1),
-        )
-        self.stream3 = nn.Sequential(
-            ConvBlock(in_channels, mid_channels, 1),
-            ConvBlock(mid_channels, mid_channels, 3, p=1),
-        )
-        self.stream4 = nn.Sequential(
-            nn.AvgPool2d(3, stride=1, padding=1),
-            ConvBlock(in_channels, mid_channels, 1),
-        )
-
-    def forward(self, x):
-        s1 = self.stream1(x)
-        s2 = self.stream2(x)
-        s3 = self.stream3(x)
-        s4 = self.stream4(x)
-        y = torch.cat([s1, s2, s3, s4], dim=1)
-        return y
-
+	def __init__(self, in_channels, out_channels):
+		"""InceptionA block."""
+		super().__init__()
+		mid_channels = out_channels // 4
+		self.stream1 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+		)
+		self.stream2 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+		)
+		self.stream3 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+		)
+		self.stream4 = nn.Sequential(
+			nn.AvgPool2d(3, stride=1, padding=1),
+			ConvBlock(in_channels, mid_channels, 1),
+		)
+	def forward(self, x):
+		s1 = self.stream1(x)
+		s2 = self.stream2(x)
+		s3 = self.stream3(x)
+		s4 = self.stream4(x)
+		y = torch.cat([s1, s2, s3, s4], dim=1)
+		return y
 
 class InceptionB(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(InceptionB, self).__init__()
-        mid_channels = out_channels // 4
-        self.stream1 = nn.Sequential(
-            ConvBlock(in_channels, mid_channels, 1),
-            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
-        )
-        self.stream2 = nn.Sequential(
-            ConvBlock(in_channels, mid_channels, 1),
-            ConvBlock(mid_channels, mid_channels, 3, p=1),
-            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
-        )
-        self.stream3 = nn.Sequential(
-            nn.MaxPool2d(3, stride=2, padding=1),
-            ConvBlock(in_channels, mid_channels * 2, 1),
-        )
-
-    def forward(self, x):
-        s1 = self.stream1(x)
-        s2 = self.stream2(x)
-        s3 = self.stream3(x)
-        y = torch.cat([s1, s2, s3], dim=1)
-        return y
-
+	def __init__(self, in_channels, out_channels):
+		"""InceptionB block."""
+		super().__init__()
+		mid_channels = out_channels // 4
+		self.stream1 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+		)
+		self.stream2 = nn.Sequential(
+			ConvBlock(in_channels, mid_channels, 1),
+			ConvBlock(mid_channels, mid_channels, 3, p=1),
+			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+		)
+		self.stream3 = nn.Sequential(
+			nn.MaxPool2d(3, stride=2, padding=1),
+			ConvBlock(in_channels, mid_channels * 2, 1),
+		)
+	def forward(self, x):
+		s1 = self.stream1(x)
+		s2 = self.stream2(x)
+		s3 = self.stream3(x)
+		y = torch.cat([s1, s2, s3], dim=1)
+		return y
 
 class SpatialAttn(nn.Module):
-    def __init__(self):
-        super(SpatialAttn, self).__init__()
-        self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
-        self.conv2 = ConvBlock(1, 1, 1)
-
-    def forward(self, x):
-        x = x.mean(1, keepdim=True)
-        x = self.conv1(x)
-        x = F.interpolate(
-            x,
-            (x.size(2) * 2, x.size(3) * 2),
-            mode="bilinear",
-            align_corners=True,
-        )
-        x = self.conv2(x)
-        return x
-
+	def __init__(self):
+		"""Spatial attention block."""
+		super().__init__()
+		self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
+		self.conv2 = ConvBlock(1, 1, 1)
+	def forward(self, x):
+		x = x.mean(1, keepdim=True)
+		x = self.conv1(x)
+		x = F.interpolate(
+			x, (x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
+		)
+		x = self.conv2(x)
+		return x
 
 class ChannelAttn(nn.Module):
-    def __init__(self, in_channels, reduction_rate=16):
-        super(ChannelAttn, self).__init__()
-        assert in_channels % reduction_rate == 0
-        self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
-        self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
-
-    def forward(self, x):
-        x = F.avg_pool2d(x, x.size()[2:])
-        x = self.conv1(x)
-        x = self.conv2(x)
-        return x
-
+	def __init__(self, in_channels, reduction_rate=16):
+		"""Channel attention block."""
+		super().__init__()
+		assert in_channels % reduction_rate == 0
+		self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
+		self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
+	def forward(self, x):
+		x = F.avg_pool2d(x, x.size()[2:])
+		x = self.conv1(x)
+		x = self.conv2(x)
+		return x
 
 class SoftAttn(nn.Module):
-    def __init__(self, in_channels):
-        super(SoftAttn, self).__init__()
-        self.spatial_attn = SpatialAttn()
-        self.channel_attn = ChannelAttn(in_channels)
-        self.conv = ConvBlock(in_channels, in_channels, 1)
-
-    def forward(self, x):
-        y_spatial = self.spatial_attn(x)
-        y_channel = self.channel_attn(x)
-        y = y_spatial * y_channel
-        y = torch.sigmoid(self.conv(y))
-        return y
-
+	def __init__(self, in_channels):
+		"""Soft attention block."""
+		super().__init__()
+		self.spatial_attn = SpatialAttn()
+		self.channel_attn = ChannelAttn(in_channels)
+		self.conv = ConvBlock(in_channels, in_channels, 1)
+	def forward(self, x):
+		y_spatial = self.spatial_attn(x)
+		y_channel = self.channel_attn(x)
+		y = y_spatial * y_channel
+		y = torch.sigmoid(self.conv(y))
+		return y
 
 class HardAttn(nn.Module):
-    def __init__(self, in_channels):
-        super(HardAttn, self).__init__()
-        self.fc = nn.Linear(in_channels, 4 * 2)
-        self.init_params()
-
-    def init_params(self):
-        self.fc.weight.data.zero_()
-        self.fc.bias.data.copy_(
-            torch.tensor(
-                [0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float
-            )
-        )
-
-    def forward(self, x):
-        x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
-        theta = torch.tanh(self.fc(x))
-        theta = theta.view(-1, 4, 2)
-        return theta
-
+	def __init__(self, in_channels):
+		"""Hard attention block."""
+		super().__init__()
+		self.fc = nn.Linear(in_channels, 4 * 2)
+		self.init_params()
+	def init_params(self):
+		self.fc.weight.data.zero_()
+		self.fc.bias.data.copy_(
+			torch.tensor([0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float)
+		)
+	def forward(self, x):
+		x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
+		theta = torch.tanh(self.fc(x))
+		theta = theta.view(-1, 4, 2)
+		return theta
 
 class HarmAttn(nn.Module):
-    def __init__(self, in_channels):
-        super(HarmAttn, self).__init__()
-        self.soft_attn = SoftAttn(in_channels)
-        self.hard_attn = HardAttn(in_channels)
-
-    def forward(self, x):
-        y_soft_attn = self.soft_attn(x)
-        theta = self.hard_attn(x)
-        return y_soft_attn, theta
-
+	def __init__(self, in_channels):
+		"""Harmonious attention block."""
+		super().__init__()
+		self.soft_attn = SoftAttn(in_channels)
+		self.hard_attn = HardAttn(in_channels)
+	def forward(self, x):
+		y_soft_attn = self.soft_attn(x)
+		theta = self.hard_attn(x)
+		return y_soft_attn, theta
 
 class HACNN(nn.Module):
-    def __init__(
-        self,
-        num_classes,
-        loss="softmax",
-        nchannels=[128, 256, 384],
-        feat_dim=512,
-        learn_region=True,
-        use_gpu=True,
-        **kwargs,
-    ):
-        super(HACNN, self).__init__()
-        self.loss = loss
-        self.learn_region = learn_region
-        self.use_gpu = use_gpu
-        self.conv = ConvBlock(3, 32, 3, s=2, p=1)
-        self.inception1 = nn.Sequential(
-            InceptionA(32, nchannels[0]),
-            InceptionB(nchannels[0], nchannels[0]),
-        )
-        self.ha1 = HarmAttn(nchannels[0])
-        self.inception2 = nn.Sequential(
-            InceptionA(nchannels[0], nchannels[1]),
-            InceptionB(nchannels[1], nchannels[1]),
-        )
-        self.ha2 = HarmAttn(nchannels[1])
-        self.inception3 = nn.Sequential(
-            InceptionA(nchannels[1], nchannels[2]),
-            InceptionB(nchannels[2], nchannels[2]),
-        )
-        self.ha3 = HarmAttn(nchannels[2])
-        self.fc_global = nn.Sequential(
-            nn.Linear(nchannels[2], feat_dim),
-            nn.BatchNorm1d(feat_dim),
-            nn.ReLU(),
-        )
-        self.classifier_global = nn.Linear(feat_dim, num_classes)
-        if self.learn_region:
-            self.init_scale_factors()
-            self.local_conv1 = InceptionB(32, nchannels[0])
-            self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
-            self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
-            self.fc_local = nn.Sequential(
-                nn.Linear(nchannels[2] * 4, feat_dim),
-                nn.BatchNorm1d(feat_dim),
-                nn.ReLU(),
-            )
-            self.classifier_local = nn.Linear(feat_dim, num_classes)
-            self.feat_dim = feat_dim * 2
-        else:
-            self.feat_dim = feat_dim
-
-    def init_scale_factors(self):
-        self.scale_factors = []
-        self.scale_factors.append(
-            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
-        )
-        self.scale_factors.append(
-            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
-        )
-        self.scale_factors.append(
-            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
-        )
-        self.scale_factors.append(
-            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
-        )
-
-    def stn(self, x, theta):
-        grid = F.affine_grid(theta, x.size())
-        x = F.grid_sample(x, grid)
-        return x
-
-    def transform_theta(self, theta_i, region_idx):
-        scale_factors = self.scale_factors[region_idx]
-        theta = torch.zeros(theta_i.size(0), 2, 3)
-        theta[:, :, :2] = scale_factors
-        theta[:, :, -1] = theta_i
-        if self.use_gpu:
-            theta = theta.to(next(self.parameters()).device)
-        return theta
-
-    def forward(self, x):
-        assert x.size(2) == 160 and x.size(3) == 64, (
-            f"Input size does not match, expected (160, 64) but got ({x.size(2)}, {x.size(3)})"
-        )
-        x = self.conv(x)
-        x1 = self.inception1(x)
-        x1_attn, x1_theta = self.ha1(x1)
-        x1_out = x1 * x1_attn
-        if self.learn_region:
-            x1_local_list = []
-            for region_idx in range(4):
-                x1_theta_i = x1_theta[:, region_idx, :]
-                x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
-                x1_trans_i = self.stn(x, x1_theta_i)
-                x1_trans_i = F.interpolate(
-                    x1_trans_i, (24, 28), mode="bilinear", align_corners=True
-                )
-                x1_local_i = self.local_conv1(x1_trans_i)
-                x1_local_list.append(x1_local_i)
-        x2 = self.inception2(x1_out)
-        x2_attn, x2_theta = self.ha2(x2)
-        x2_out = x2 * x2_attn
-        if self.learn_region:
-            x2_local_list = []
-            for region_idx in range(4):
-                x2_theta_i = x2_theta[:, region_idx, :]
-                x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
-                x2_trans_i = self.stn(x1_out, x2_theta_i)
-                x2_trans_i = F.interpolate(
-                    x2_trans_i, (12, 14), mode="bilinear", align_corners=True
-                )
-                x2_local_i = x2_trans_i + x1_local_list[region_idx]
-                x2_local_i = self.local_conv2(x2_local_i)
-                x2_local_list.append(x2_local_i)
-        x3 = self.inception3(x2_out)
-        x3_attn, x3_theta = self.ha3(x3)
-        x3_out = x3 * x3_attn
-        if self.learn_region:
-            x3_local_list = []
-            for region_idx in range(4):
-                x3_theta_i = x3_theta[:, region_idx, :]
-                x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
-                x3_trans_i = self.stn(x2_out, x3_theta_i)
-                x3_trans_i = F.interpolate(
-                    x3_trans_i, (6, 7), mode="bilinear", align_corners=True
-                )
-                x3_local_i = x3_trans_i + x2_local_list[region_idx]
-                x3_local_i = self.local_conv3(x3_local_i)
-                x3_local_list.append(x3_local_i)
-        x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
-            x3_out.size(0), x3_out.size(1)
-        )
-        x_global = self.fc_global(x_global)
-        if self.learn_region:
-            x_local_list = []
-            for region_idx in range(4):
-                x_local_i = x3_local_list[region_idx]
-                x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
-                    x_local_i.size(0), -1
-                )
-                x_local_list.append(x_local_i)
-            x_local = torch.cat(x_local_list, 1)
-            x_local = self.fc_local(x_local)
-        if not self.training:
-            if self.learn_region:
-                x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
-                x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
-                return torch.cat([x_global, x_local], 1)
-            else:
-                return x_global
-        prelogits_global = self.classifier_global(x_global)
-        if self.learn_region:
-            prelogits_local = self.classifier_local(x_local)
-        if self.loss == "softmax":
-            if self.learn_region:
-                return (prelogits_global, prelogits_local)
-            else:
-                return prelogits_global
-        elif self.loss == "triplet":
-            if self.learn_region:
-                return (prelogits_global, prelogits_local), (x_global, x_local)
-            else:
-                return prelogits_global, x_global
-        else:
-            raise KeyError(f"Unsupported loss: {self.loss}")
+	def __init__(
+		self,
+		num_classes,
+		loss="softmax",
+		nchannels=None,
+		feat_dim=512,
+		learn_region=True,
+		use_gpu=True,
+		**kwargs,
+	):
+		"""Harmonious Attention Convolutional Neural Network (HACNN) for person re-identification."""
+		super().__init__()
+		if nchannels is None:
+			nchannels = [128, 256, 384]
+		self.loss = loss
+		self.learn_region = learn_region
+		self.use_gpu = use_gpu
+		self.conv = ConvBlock(3, 32, 3, s=2, p=1)
+		self.inception1 = nn.Sequential(
+			InceptionA(32, nchannels[0]),
+			InceptionB(nchannels[0], nchannels[0]),
+		)
+		self.ha1 = HarmAttn(nchannels[0])
+		self.inception2 = nn.Sequential(
+			InceptionA(nchannels[0], nchannels[1]),
+			InceptionB(nchannels[1], nchannels[1]),
+		)
+		self.ha2 = HarmAttn(nchannels[1])
+		self.inception3 = nn.Sequential(
+			InceptionA(nchannels[1], nchannels[2]),
+			InceptionB(nchannels[2], nchannels[2]),
+		)
+		self.ha3 = HarmAttn(nchannels[2])
+		self.fc_global = nn.Sequential(
+			nn.Linear(nchannels[2], feat_dim),
+			nn.BatchNorm1d(feat_dim),
+			nn.ReLU(),
+		)
+		self.classifier_global = nn.Linear(feat_dim, num_classes)
+		if self.learn_region:
+			self.init_scale_factors()
+			self.local_conv1 = InceptionB(32, nchannels[0])
+			self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
+			self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
+			self.fc_local = nn.Sequential(
+				nn.Linear(nchannels[2] * 4, feat_dim),
+				nn.BatchNorm1d(feat_dim),
+				nn.ReLU(),
+			)
+			self.classifier_local = nn.Linear(feat_dim, num_classes)
+			self.feat_dim = feat_dim * 2
+		else:
+			self.feat_dim = feat_dim
+	def init_scale_factors(self):
+		"""Initialize scale factors for STN."""
+		self.scale_factors = []
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
+	def stn(self, x, theta):
+		"""Spatial transformer network."""
+		grid = F.affine_grid(theta, x.size())
+		x = F.grid_sample(x, grid)
+		return x
+	def transform_theta(self, theta_i, region_idx):
+		"""Transform theta for a given region."""
+		scale_factors = self.scale_factors[region_idx]
+		theta = torch.zeros(theta_i.size(0), 2, 3)
+		theta[:, :, :2] = scale_factors
+		theta[:, :, -1] = theta_i
+		if self.use_gpu:
+			theta = theta.to(next(self.parameters()).device)
+		return theta
+	def forward(self, x):
+		"""Forward pass."""
+		assert (
+			x.size(2) == 160 and x.size(3) == 64
+		), (
+			f"Input size does not match, expected (160, 64) but got ({x.size(2)}, {x.size(3)})"
+		)
+		x = self.conv(x)
+		x1 = self.inception1(x)
+		x1_attn, x1_theta = self.ha1(x1)
+		x1_out = x1 * x1_attn
+		if self.learn_region:
+			x1_local_list = []
+			for region_idx in range(4):
+				x1_theta_i = x1_theta[:, region_idx, :]
+				x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
+				x1_trans_i = self.stn(x, x1_theta_i)
+				x1_trans_i = F.interpolate(
+					x1_trans_i, (24, 28), mode="bilinear", align_corners=True
+				)
+				x1_local_i = self.local_conv1(x1_trans_i)
+				x1_local_list.append(x1_local_i)
+		x2 = self.inception2(x1_out)
+		x2_attn, x2_theta = self.ha2(x2)
+		x2_out = x2 * x2_attn
+		if self.learn_region:
+			x2_local_list = []
+			for region_idx in range(4):
+				x2_theta_i = x2_theta[:, region_idx, :]
+				x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
+				x2_trans_i = self.stn(x1_out, x2_theta_i)
+				x2_trans_i = F.interpolate(
+					x2_trans_i, (12, 14), mode="bilinear", align_corners=True
+				)
+				x2_local_i = x2_trans_i + x1_local_list[region_idx]
+				x2_local_i = self.local_conv2(x2_local_i)
+				x2_local_list.append(x2_local_i)
+		x3 = self.inception3(x2_out)
+		x3_attn, x3_theta = self.ha3(x3)
+		x3_out = x3 * x3_attn
+		if self.learn_region:
+			x3_local_list = []
+			for region_idx in range(4):
+				x3_theta_i = x3_theta[:, region_idx, :]
+				x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
+				x3_trans_i = self.stn(x2_out, x3_theta_i)
+				x3_trans_i = F.interpolate(
+					x3_trans_i, (6, 7), mode="bilinear", align_corners=True
+				)
+				x3_local_i = x3_trans_i + x2_local_list[region_idx]
+				x3_local_i = self.local_conv3(x3_local_i)
+				x3_local_list.append(x3_local_i)
+		x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
+			x3_out.size(0), x3_out.size(1)
+		)
+		x_global = self.fc_global(x_global)
+		if self.learn_region:
+			x_local_list = []
+			for region_idx in range(4):
+				x_local_i = x3_local_list[region_idx]
+				x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
+					x_local_i.size(0), -1
+				)
+				x_local_list.append(x_local_i)
+			x_local = torch.cat(x_local_list, 1)
+			x_local = self.fc_local(x_local)
+		if not self.training:
+			if self.learn_region:
+				x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
+				x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
+				return torch.cat([x_global, x_local], 1)
+			else:
+				return x_global
+		prelogits_global = self.classifier_global(x_global)
+		if self.learn_region:
+			prelogits_local = self.classifier_local(x_local)
+		if self.loss == "softmax":
+			if self.learn_region:
+				return (prelogits_global, prelogits_local)
+			else:
+				return prelogits_global
+		elif self.loss == "triplet":
+			if self.learn_region:
+				return (prelogits_global, prelogits_local), (x_global, x_local)
+			else:
+				return prelogits_global, x_global
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
index 8daad863..334bd1c8 100644
--- a/ethology/reid/backbones/mlfn.py
+++ b/ethology/reid/backbones/mlfn.py
@@ -1,5 +1,7 @@
-# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
+"""MLFN backbone for person re-identification."""
+
+from __future__ import absolute_import, division
 import torch
 import torch.utils.model_zoo as model_zoo
 from torch import nn
@@ -7,8 +9,8 @@
 
 __all__ = ["mlfn"]
 model_urls = {
-    # training epoch = 5, top1 = 51.6
-    "imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
+	# training epoch = 5, top1 = 51.6
+	"imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
 }
 
 
diff --git a/ethology/reid/backbones/mobilenetv2.py b/ethology/reid/backbones/mobilenetv2.py
index b3e69186..35a16219 100644
--- a/ethology/reid/backbones/mobilenetv2.py
+++ b/ethology/reid/backbones/mobilenetv2.py
@@ -1,5 +1,6 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
+from __future__ import absolute_import, division
 
 import torch.utils.model_zoo as model_zoo
 from torch import nn
@@ -8,272 +9,239 @@
 __all__ = ["mobilenetv2_x1_0", "mobilenetv2_x1_4"]
 
 model_urls = {
-    # 1.0: top-1 71.3
-    "mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
-    # 1.4: top-1 73.9
-    "mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
+	# 1.0: top-1 71.3
+	"mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
+	# 1.4: top-1 73.9
+	"mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
 }
 
 
 class ConvBlock(nn.Module):
-    """Basic convolutional block.
+	"""Basic convolutional block.
 
-    convolution (bias discarded) + batch normalization + relu6.
+	convolution (bias discarded) + batch normalization + relu6.
 
-    Args:
-            in_c (int): number of input channels.
-            out_c (int): number of output channels.
-            k (int or tuple): kernel size.
-            s (int or tuple): stride.
-            p (int or tuple): padding.
-            g (int): number of blocked connections from input channels
-                    to output channels (default: 1).
+	Args:
+		in_c (int): number of input channels.
+		out_c (int): number of output channels.
+		k (int or tuple): kernel size.
+		s (int or tuple): stride.
+		p (int or tuple): padding.
+		g (int): number of blocked connections from input channels
+			to output channels (default: 1).
+	"""
 
-    """
+	def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
+		super(ConvBlock, self).__init__()
+		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
+		self.bn = nn.BatchNorm2d(out_c)
 
-    def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
-        super(ConvBlock, self).__init__()
-        self.conv = nn.Conv2d(
-            in_c, out_c, k, stride=s, padding=p, bias=False, groups=g
-        )
-        self.bn = nn.BatchNorm2d(out_c)
-
-    def forward(self, x):
-        return F.relu6(self.bn(self.conv(x)))
+	def forward(self, x):
+		return F.relu6(self.bn(self.conv(x)))
 
 
 class Bottleneck(nn.Module):
-    def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
-        super(Bottleneck, self).__init__()
-        mid_channels = in_channels * expansion_factor
-        self.use_residual = stride == 1 and in_channels == out_channels
-        self.conv1 = ConvBlock(in_channels, mid_channels, 1)
-        self.dwconv2 = ConvBlock(
-            mid_channels, mid_channels, 3, stride, 1, g=mid_channels
-        )
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(mid_channels, out_channels, 1, bias=False),
-            nn.BatchNorm2d(out_channels),
-        )
-
-    def forward(self, x):
-        m = self.conv1(x)
-        m = self.dwconv2(m)
-        m = self.conv3(m)
-        if self.use_residual:
-            return x + m
-        else:
-            return m
+	def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
+		super(Bottleneck, self).__init__()
+		mid_channels = in_channels * expansion_factor
+		self.use_residual = stride == 1 and in_channels == out_channels
+		self.conv1 = ConvBlock(in_channels, mid_channels, 1)
+		self.dwconv2 = ConvBlock(
+			mid_channels, mid_channels, 3, stride, 1, g=mid_channels
+		)
+		self.conv3 = nn.Sequential(
+			nn.Conv2d(mid_channels, out_channels, 1, bias=False),
+			nn.BatchNorm2d(out_channels),
+		)
+
+	def forward(self, x):
+		m = self.conv1(x)
+		m = self.dwconv2(m)
+		m = self.conv3(m)
+		if self.use_residual:
+			return x + m
+		else:
+			return m
 
 
 class MobileNetV2(nn.Module):
-    """MobileNetV2.
-
-    Reference:
-            Sandler et al. MobileNetV2: Inverted Residuals and
-            Linear Bottlenecks. CVPR 2018.
-
-    Public keys:
-            - ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
-            - ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
-    """
-
-    def __init__(
-        self,
-        num_classes,
-        width_mult=1,
-        loss="softmax",
-        fc_dims=None,
-        dropout_p=None,
-        **kwargs,
-    ):
-        super(MobileNetV2, self).__init__()
-        self.loss = loss
-        self.in_channels = int(32 * width_mult)
-        self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
-
-        # construct layers
-        self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
-        self.conv2 = self._make_layer(
-            Bottleneck, 1, int(16 * width_mult), 1, 1
-        )
-        self.conv3 = self._make_layer(
-            Bottleneck, 6, int(24 * width_mult), 2, 2
-        )
-        self.conv4 = self._make_layer(
-            Bottleneck, 6, int(32 * width_mult), 3, 2
-        )
-        self.conv5 = self._make_layer(
-            Bottleneck, 6, int(64 * width_mult), 4, 2
-        )
-        self.conv6 = self._make_layer(
-            Bottleneck, 6, int(96 * width_mult), 3, 1
-        )
-        self.conv7 = self._make_layer(
-            Bottleneck, 6, int(160 * width_mult), 3, 2
-        )
-        self.conv8 = self._make_layer(
-            Bottleneck, 6, int(320 * width_mult), 1, 1
-        )
-        self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
-
-        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-        self.fc = self._construct_fc_layer(
-            fc_dims, self.feature_dim, dropout_p
-        )
-        self.classifier = nn.Linear(self.feature_dim, num_classes)
-
-        self._init_params()
-
-    def _make_layer(self, block, t, c, n, s):
-        # t: expansion factor
-        # c: output channels
-        # n: number of blocks
-        # s: stride for first layer
-        layers = []
-        layers.append(block(self.in_channels, c, t, s))
-        self.in_channels = c
-        for i in range(1, n):
-            layers.append(block(self.in_channels, c, t))
-        return nn.Sequential(*layers)
-
-    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-        """Constructs fully connected layer.
-
-        Args:
-                fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
-                input_dim (int): input dimension
-                dropout_p (float): dropout probability, if None, dropout is unused
-
-        """
-        if fc_dims is None:
-            self.feature_dim = input_dim
-            return None
-
-        assert isinstance(fc_dims, (list, tuple)), (
-            f"fc_dims must be either list or tuple, but got {type(fc_dims)}"
-        )
-
-        layers = []
-        for dim in fc_dims:
-            layers.append(nn.Linear(input_dim, dim))
-            layers.append(nn.BatchNorm1d(dim))
-            layers.append(nn.ReLU(inplace=True))
-            if dropout_p is not None:
-                layers.append(nn.Dropout(p=dropout_p))
-            input_dim = dim
-
-        self.feature_dim = fc_dims[-1]
-
-        return nn.Sequential(*layers)
-
-    def _init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(
-                    m.weight, mode="fan_out", nonlinearity="relu"
-                )
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d) or isinstance(
-                m, nn.BatchNorm1d
-            ):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                nn.init.normal_(m.weight, 0, 0.01)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-
-    def featuremaps(self, x):
-        x = self.conv1(x)
-        x = self.conv2(x)
-        x = self.conv3(x)
-        x = self.conv4(x)
-        x = self.conv5(x)
-        x = self.conv6(x)
-        x = self.conv7(x)
-        x = self.conv8(x)
-        x = self.conv9(x)
-        return x
-
-    def forward(self, x):
-        f = self.featuremaps(x)
-        v = self.global_avgpool(f)
-        v = v.view(v.size(0), -1)
-
-        if self.fc is not None:
-            v = self.fc(v)
-
-        if not self.training:
-            return v
-
-        y = self.classifier(v)
-
-        if self.loss == "softmax":
-            return y
-        elif self.loss == "triplet":
-            return y, v
-        else:
-            raise KeyError(f"Unsupported loss: {self.loss}")
+	"""MobileNetV2.
+
+	Reference:
+		Sandler et al. MobileNetV2: Inverted Residuals and
+		Linear Bottlenecks. CVPR 2018.
+
+	Public keys:
+		- ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
+		- ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
+	"""
+
+	def __init__(
+		self,
+		num_classes,
+		width_mult=1,
+		loss="softmax",
+		fc_dims=None,
+		dropout_p=None,
+		**kwargs,
+	):
+		super(MobileNetV2, self).__init__()
+		self.loss = loss
+		self.in_channels = int(32 * width_mult)
+		self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
+
+		# construct layers
+		self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
+		self.conv2 = self._make_layer(Bottleneck, 1, int(16 * width_mult), 1, 1)
+		self.conv3 = self._make_layer(Bottleneck, 6, int(24 * width_mult), 2, 2)
+		self.conv4 = self._make_layer(Bottleneck, 6, int(32 * width_mult), 3, 2)
+		self.conv5 = self._make_layer(Bottleneck, 6, int(64 * width_mult), 4, 2)
+		self.conv6 = self._make_layer(Bottleneck, 6, int(96 * width_mult), 3, 1)
+		self.conv7 = self._make_layer(Bottleneck, 6, int(160 * width_mult), 3, 2)
+		self.conv8 = self._make_layer(Bottleneck, 6, int(320 * width_mult), 1, 1)
+		self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
+
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc = self._construct_fc_layer(fc_dims, self.feature_dim, dropout_p)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+
+		self._init_params()
+
+	def _make_layer(self, block, t, c, n, s):
+		# t: expansion factor
+		# c: output channels
+		# n: number of blocks
+		# s: stride for first layer
+		layers = []
+		layers.append(block(self.in_channels, c, t, s))
+		self.in_channels = c
+		for i in range(1, n):
+			layers.append(block(self.in_channels, c, t))
+		return nn.Sequential(*layers)
+
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		"""Constructs fully connected layer.
+
+		Args:
+			fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
+			input_dim (int): input dimension
+			dropout_p (float): dropout probability, if None, dropout is unused
+		"""
+		if fc_dims is None:
+			self.feature_dim = input_dim
+			return None
+
+		assert isinstance(
+			fc_dims, (list, tuple)
+		), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
+
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU(inplace=True))
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+
+		self.feature_dim = fc_dims[-1]
+
+		return nn.Sequential(*layers)
+
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.conv3(x)
+		x = self.conv4(x)
+		x = self.conv5(x)
+		x = self.conv6(x)
+		x = self.conv7(x)
+		x = self.conv8(x)
+		x = self.conv9(x)
+		return x
+
+	def forward(self, x):
+		f = self.featuremaps(x)
+		v = self.global_avgpool(f)
+		v = v.view(v.size(0), -1)
+
+		if self.fc is not None:
+			v = self.fc(v)
+
+		if not self.training:
+			return v
+
+		y = self.classifier(v)
+
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
 
 
 def init_pretrained_weights(model, model_url):
-    """Initializes model with pretrained weights.
+	"""Initializes model with pretrained weights.
 
-    Layers that don't match with pretrained layers in name or size are kept unchanged.
-    """
-    pretrain_dict = model_zoo.load_url(model_url)
-    model_dict = model.state_dict()
-    pretrain_dict = {
-        k: v
-        for k, v in pretrain_dict.items()
-        if k in model_dict and model_dict[k].size() == v.size()
-    }
-    model_dict.update(pretrain_dict)
-    model.load_state_dict(model_dict)
+	Layers that don't match with pretrained layers in name or size are kept unchanged.
+	"""
+	pretrain_dict = model_zoo.load_url(model_url)
+	model_dict = model.state_dict()
+	pretrain_dict = {
+		k: v
+		for k, v in pretrain_dict.items()
+		if k in model_dict and model_dict[k].size() == v.size()
+	}
+	model_dict.update(pretrain_dict)
+	model.load_state_dict(model_dict)
 
 
 def mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs):
-    model = MobileNetV2(
-        num_classes,
-        loss=loss,
-        width_mult=1,
-        fc_dims=None,
-        dropout_p=None,
-        **kwargs,
-    )
-    if pretrained:
-        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
-        import warnings
-
-        warnings.warn(
-            "The imagenet pretrained weights need to be manually downloaded from {}".format(
-                model_urls["mobilenetv2_x1_0"]
-            )
-        )
-    return model
-
+	model = MobileNetV2(
+		num_classes, loss=loss, width_mult=1, fc_dims=None, dropout_p=None, **kwargs
+	)
+	if pretrained:
+		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
+		import warnings
 
-def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):
-    model = MobileNetV2(
-        num_classes,
-        loss=loss,
-        width_mult=1.4,
-        fc_dims=None,
-        dropout_p=None,
-        **kwargs,
-    )
-    if pretrained:
-        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
-        import warnings
-
-        warnings.warn(
-            "The imagenet pretrained weights need to be manually downloaded from {}".format(
-                model_urls["mobilenetv2_x1_4"]
-            )
-        )
-    return model
+		warnings.warn(
+			"The imagenet pretrained weights need to be manually downloaded from {}".format(
+				model_urls["mobilenetv2_x1_0"]
+			)
+		)
+	return model
 
 
+def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):
+	model = MobileNetV2(
+		num_classes, loss=loss, width_mult=1.4, fc_dims=None, dropout_p=None, **kwargs
+	)
+	if pretrained:
+		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
+		import warnings
+
+		warnings.warn(
+			"The imagenet pretrained weights need to be manually downloaded from {}".format(
+				model_urls["mobilenetv2_x1_4"]
+			)
+		)
+	return model
 # Copied from boxmot/boxmot/reid/backbones/mobilenetv2.py
diff --git a/ethology/reid/backbones/osnet.py b/ethology/reid/backbones/osnet.py
index c13dd5b7..c07e4e45 100644
--- a/ethology/reid/backbones/osnet.py
+++ b/ethology/reid/backbones/osnet.py
@@ -1,5 +1,6 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
+from __future__ import absolute_import, division
 
 import warnings
 
@@ -7,529 +8,338 @@
 from torch import nn
 from torch.nn import functional as F
 
-__all__ = [
-    "osnet_x1_0",
-    "osnet_x0_75",
-    "osnet_x0_5",
-    "osnet_x0_25",
-    "osnet_ibn_x1_0",
-]
+__all__ = ["osnet_x1_0", "osnet_x0_75", "osnet_x0_5", "osnet_x0_25", "osnet_ibn_x1_0"]
 
 pretrained_urls = {
-    "osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
-    "osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
-    "osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
-    "osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
-    "osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
+	"osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
+	"osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
+	"osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
+	"osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
+	"osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
 }
 
 # ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, ChannelGate, OSBlock...
 
-
 class ConvLayer(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        groups=1,
-        IN=False,
-    ):
-        super(ConvLayer, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=stride,
-            padding=padding,
-            bias=False,
-            groups=groups,
-        )
-        if IN:
-            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
-        else:
-            self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-        return x
-
+	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
+		super(ConvLayer, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
+		if IN:
+			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+		else:
+			self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
 
 class Conv1x1(nn.Module):
-    def __init__(self, in_channels, out_channels, stride=1, groups=1):
-        super(Conv1x1, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            1,
-            stride=stride,
-            padding=0,
-            bias=False,
-            groups=groups,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-        return x
-
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv1x1, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
 
 class Conv1x1Linear(nn.Module):
-    def __init__(self, in_channels, out_channels, stride=1):
-        super(Conv1x1Linear, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        return x
-
+	def __init__(self, in_channels, out_channels, stride=1):
+		super(Conv1x1Linear, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
+		self.bn = nn.BatchNorm2d(out_channels)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return x
 
 class Conv3x3(nn.Module):
-    def __init__(self, in_channels, out_channels, stride=1, groups=1):
-        super(Conv3x3, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            3,
-            stride=stride,
-            padding=1,
-            bias=False,
-            groups=groups,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        x = self.relu(x)
-        return x
-
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv3x3, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
 
 class LightConv3x3(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(LightConv3x3, self).__init__()
-        self.conv1 = nn.Conv2d(
-            in_channels, out_channels, 1, stride=1, padding=0, bias=False
-        )
-        self.conv2 = nn.Conv2d(
-            out_channels,
-            out_channels,
-            3,
-            stride=1,
-            padding=1,
-            bias=False,
-            groups=out_channels,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU(inplace=True)
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.conv2(x)
-        x = self.bn(x)
-        x = self.relu(x)
-        return x
-
+	def __init__(self, in_channels, out_channels):
+		super(LightConv3x3, self).__init__()
+		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
+		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU(inplace=True)
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.bn(x)
+		x = self.relu(x)
+		return x
 
 class ChannelGate(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        num_gates=None,
-        return_gates=False,
-        gate_activation="sigmoid",
-        reduction=16,
-        layer_norm=False,
-    ):
-        super(ChannelGate, self).__init__()
-        if num_gates is None:
-            num_gates = in_channels
-        self.return_gates = return_gates
-        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-        self.fc1 = nn.Conv2d(
-            in_channels,
-            in_channels // reduction,
-            kernel_size=1,
-            bias=True,
-            padding=0,
-        )
-        self.norm1 = None
-        if layer_norm:
-            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
-        self.relu = nn.ReLU(inplace=True)
-        self.fc2 = nn.Conv2d(
-            in_channels // reduction,
-            num_gates,
-            kernel_size=1,
-            bias=True,
-            padding=0,
-        )
-        if gate_activation == "sigmoid":
-            self.gate_activation = nn.Sigmoid()
-        elif gate_activation == "relu":
-            self.gate_activation = nn.ReLU(inplace=True)
-        elif gate_activation == "linear":
-            self.gate_activation = None
-        else:
-            raise RuntimeError(f"Unknown gate activation: {gate_activation}")
-
-    def forward(self, x):
-        input = x
-        x = self.global_avgpool(x)
-        x = self.fc1(x)
-        if self.norm1 is not None:
-            x = self.norm1(x)
-        x = self.relu(x)
-        x = self.fc2(x)
-        if self.gate_activation is not None:
-            x = self.gate_activation(x)
-        if self.return_gates:
-            return x
-        return input * x
-
+	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
+		super(ChannelGate, self).__init__()
+		if num_gates is None:
+			num_gates = in_channels
+		self.return_gates = return_gates
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
+		self.norm1 = None
+		if layer_norm:
+			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+		self.relu = nn.ReLU(inplace=True)
+		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
+		if gate_activation == "sigmoid":
+			self.gate_activation = nn.Sigmoid()
+		elif gate_activation == "relu":
+			self.gate_activation = nn.ReLU(inplace=True)
+		elif gate_activation == "linear":
+			self.gate_activation = None
+		else:
+			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
+	def forward(self, x):
+		input = x
+		x = self.global_avgpool(x)
+		x = self.fc1(x)
+		if self.norm1 is not None:
+			x = self.norm1(x)
+		x = self.relu(x)
+		x = self.fc2(x)
+		if self.gate_activation is not None:
+			x = self.gate_activation(x)
+		if self.return_gates:
+			return x
+		return input * x
 
 class OSBlock(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        IN=False,
-        bottleneck_reduction=4,
-        **kwargs,
-    ):
-        super(OSBlock, self).__init__()
-        mid_channels = out_channels // bottleneck_reduction
-        self.conv1 = Conv1x1(in_channels, mid_channels)
-        self.conv2a = LightConv3x3(mid_channels, mid_channels)
-        self.conv2b = nn.Sequential(
-            LightConv3x3(mid_channels, mid_channels),
-            LightConv3x3(mid_channels, mid_channels),
-        )
-        self.conv2c = nn.Sequential(
-            LightConv3x3(mid_channels, mid_channels),
-            LightConv3x3(mid_channels, mid_channels),
-            LightConv3x3(mid_channels, mid_channels),
-        )
-        self.conv2d = nn.Sequential(
-            LightConv3x3(mid_channels, mid_channels),
-            LightConv3x3(mid_channels, mid_channels),
-            LightConv3x3(mid_channels, mid_channels),
-            LightConv3x3(mid_channels, mid_channels),
-        )
-        self.gate = ChannelGate(mid_channels)
-        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
-        self.downsample = None
-        if in_channels != out_channels:
-            self.downsample = Conv1x1Linear(in_channels, out_channels)
-        self.IN = None
-        if IN:
-            self.IN = nn.InstanceNorm2d(out_channels, affine=True)
-
-    def forward(self, x):
-        identity = x
-        x1 = self.conv1(x)
-        x2a = self.conv2a(x1)
-        x2b = self.conv2b(x1)
-        x2c = self.conv2c(x1)
-        x2d = self.conv2d(x1)
-        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
-        x3 = self.conv3(x2)
-        if self.downsample is not None:
-            identity = self.downsample(identity)
-        out = x3 + identity
-        if self.IN is not None:
-            out = self.IN(out)
-        return F.relu(out)
-
+	def __init__(self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs):
+		super(OSBlock, self).__init__()
+		mid_channels = out_channels // bottleneck_reduction
+		self.conv1 = Conv1x1(in_channels, mid_channels)
+		self.conv2a = LightConv3x3(mid_channels, mid_channels)
+		self.conv2b = nn.Sequential(
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+		)
+		self.conv2c = nn.Sequential(
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+		)
+		self.conv2d = nn.Sequential(
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+			LightConv3x3(mid_channels, mid_channels),
+		)
+		self.gate = ChannelGate(mid_channels)
+		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+		self.downsample = None
+		if in_channels != out_channels:
+			self.downsample = Conv1x1Linear(in_channels, out_channels)
+		self.IN = None
+		if IN:
+			self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+	def forward(self, x):
+		identity = x
+		x1 = self.conv1(x)
+		x2a = self.conv2a(x1)
+		x2b = self.conv2b(x1)
+		x2c = self.conv2c(x1)
+		x2d = self.conv2d(x1)
+		x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+		x3 = self.conv3(x2)
+		if self.downsample is not None:
+			identity = self.downsample(identity)
+		out = x3 + identity
+		if self.IN is not None:
+			out = self.IN(out)
+		return F.relu(out)
 
 class OSNet(nn.Module):
-    def __init__(
-        self,
-        num_classes,
-        blocks,
-        layers,
-        channels,
-        feature_dim=512,
-        loss="softmax",
-        IN=False,
-        **kwargs,
-    ):
-        super(OSNet, self).__init__()
-        num_blocks = len(blocks)
-        assert num_blocks == len(layers)
-        assert num_blocks == len(channels) - 1
-        self.loss = loss
-        self.feature_dim = feature_dim
-        self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
-        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-        self.conv2 = self._make_layer(
-            blocks[0],
-            layers[0],
-            channels[0],
-            channels[1],
-            reduce_spatial_size=True,
-            IN=IN,
-        )
-        self.conv3 = self._make_layer(
-            blocks[1],
-            layers[1],
-            channels[1],
-            channels[2],
-            reduce_spatial_size=True,
-        )
-        self.conv4 = self._make_layer(
-            blocks[2],
-            layers[2],
-            channels[2],
-            channels[3],
-            reduce_spatial_size=False,
-        )
-        self.conv5 = Conv1x1(channels[3], channels[3])
-        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-        self.fc = self._construct_fc_layer(
-            self.feature_dim, channels[3], dropout_p=None
-        )
-        self.classifier = nn.Linear(self.feature_dim, num_classes)
-        self._init_params()
-
-    def _make_layer(
-        self,
-        block,
-        layer,
-        in_channels,
-        out_channels,
-        reduce_spatial_size,
-        IN=False,
-    ):
-        layers = []
-        layers.append(block(in_channels, out_channels, IN=IN))
-        for i in range(1, layer):
-            layers.append(block(out_channels, out_channels, IN=IN))
-        if reduce_spatial_size:
-            layers.append(
-                nn.Sequential(
-                    Conv1x1(out_channels, out_channels),
-                    nn.AvgPool2d(2, stride=2),
-                )
-            )
-        return nn.Sequential(*layers)
-
-    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-        if fc_dims is None or fc_dims < 0:
-            self.feature_dim = input_dim
-            return None
-        if isinstance(fc_dims, int):
-            fc_dims = [fc_dims]
-        layers = []
-        for dim in fc_dims:
-            layers.append(nn.Linear(input_dim, dim))
-            layers.append(nn.BatchNorm1d(dim))
-            layers.append(nn.ReLU(inplace=True))
-            if dropout_p is not None:
-                layers.append(nn.Dropout(p=dropout_p))
-            input_dim = dim
-        self.feature_dim = fc_dims[-1]
-        return nn.Sequential(*layers)
-
-    def _init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(
-                    m.weight, mode="fan_out", nonlinearity="relu"
-                )
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.BatchNorm2d) or isinstance(
-                m, nn.BatchNorm1d
-            ):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                nn.init.normal_(m.weight, 0, 0.01)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-
-    def featuremaps(self, x):
-        x = self.conv1(x)
-        x = self.maxpool(x)
-        x = self.conv2(x)
-        x = self.conv3(x)
-        x = self.conv4(x)
-        x = self.conv5(x)
-        return x
-
-    def forward(self, x, return_featuremaps=False):
-        x = self.featuremaps(x)
-        if return_featuremaps:
-            return x
-        v = self.global_avgpool(x)
-        v = v.view(v.size(0), -1)
-        if self.fc is not None:
-            v = self.fc(v)
-        if not self.training:
-            return v
-        y = self.classifier(v)
-        if self.loss == "softmax":
-            return y
-        elif self.loss == "triplet":
-            return y, v
-        else:
-            raise KeyError(f"Unsupported loss: {self.loss}")
-
+	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", IN=False, **kwargs):
+		super(OSNet, self).__init__()
+		num_blocks = len(blocks)
+		assert num_blocks == len(layers)
+		assert num_blocks == len(channels) - 1
+		self.loss = loss
+		self.feature_dim = feature_dim
+		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
+		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN)
+		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True)
+		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False)
+		self.conv5 = Conv1x1(channels[3], channels[3])
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+		self._init_params()
+	def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False):
+		layers = []
+		layers.append(block(in_channels, out_channels, IN=IN))
+		for i in range(1, layer):
+			layers.append(block(out_channels, out_channels, IN=IN))
+		if reduce_spatial_size:
+			layers.append(nn.Sequential(Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2)))
+		return nn.Sequential(*layers)
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		if fc_dims is None or fc_dims < 0:
+			self.feature_dim = input_dim
+			return None
+		if isinstance(fc_dims, int):
+			fc_dims = [fc_dims]
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU(inplace=True))
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+		self.feature_dim = fc_dims[-1]
+		return nn.Sequential(*layers)
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.maxpool(x)
+		x = self.conv2(x)
+		x = self.conv3(x)
+		x = self.conv4(x)
+		x = self.conv5(x)
+		return x
+	def forward(self, x, return_featuremaps=False):
+		x = self.featuremaps(x)
+		if return_featuremaps:
+			return x
+		v = self.global_avgpool(x)
+		v = v.view(v.size(0), -1)
+		if self.fc is not None:
+			v = self.fc(v)
+		if not self.training:
+			return v
+		y = self.classifier(v)
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
 
 def init_pretrained_weights(model, key=""):
-    import os
-    from collections import OrderedDict
-
-    import gdown
-
-    def _get_torch_home():
-        ENV_TORCH_HOME = "TORCH_HOME"
-        ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
-        DEFAULT_CACHE_DIR = "~/.cache"
-        torch_home = os.path.expanduser(
-            os.getenv(
-                ENV_TORCH_HOME,
-                os.path.join(
-                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"
-                ),
-            )
-        )
-        return torch_home
-
-    filename = key + "_imagenet.pth"
-    # Try ethology/models/ directory first
-    ethology_root = os.path.abspath(
-        os.path.join(os.path.dirname(__file__), "../../../")
-    )
-    models_dir = os.path.join(ethology_root, "models")
-    os.makedirs(models_dir, exist_ok=True)
-    local_file = os.path.join(models_dir, filename)
-    torch_home = _get_torch_home()
-    model_dir = os.path.join(torch_home, "checkpoints")
-    os.makedirs(model_dir, exist_ok=True)
-    cached_file = os.path.join(model_dir, filename)
-    # Prefer ethology/models/ directory file if present
-    if os.path.exists(local_file):
-        print(f"[OSNet] Loading model weights from {local_file}")
-        cached_file = local_file
-    elif os.path.exists(cached_file):
-        print(f"[OSNet] Loading model weights from {cached_file}")
-    else:
-        print(f"[OSNet] Downloading model weights to {cached_file}")
-        gdown.download(pretrained_urls[key], cached_file, quiet=False)
-    state_dict = torch.load(cached_file)
-    model_dict = model.state_dict()
-    new_state_dict = OrderedDict()
-    matched_layers, discarded_layers = [], []
-    for k, v in state_dict.items():
-        if k.startswith("module."):
-            k = k[7:]
-        if k in model_dict and model_dict[k].size() == v.size():
-            new_state_dict[k] = v
-            matched_layers.append(k)
-        else:
-            discarded_layers.append(k)
-    model_dict.update(new_state_dict)
-    model.load_state_dict(model_dict)
-    if len(matched_layers) == 0:
-        warnings.warn(
-            f'The pretrained weights from "{cached_file}" cannot be loaded, '
-            "please check the key names manually "
-            "(** ignored and continue **)"
-        )
-    else:
-        print(
-            f'Successfully loaded imagenet pretrained weights from "{cached_file}"'
-        )
-        if len(discarded_layers) > 0:
-            print(
-                "** The following layers are discarded "
-                f"due to unmatched keys or layer size: {discarded_layers}"
-            )
-
+	import errno
+	import os
+	from collections import OrderedDict
+	import gdown
+	def _get_torch_home():
+		ENV_TORCH_HOME = "TORCH_HOME"
+		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+		DEFAULT_CACHE_DIR = "~/.cache"
+		torch_home = os.path.expanduser(
+			os.getenv(
+				ENV_TORCH_HOME,
+				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
+			)
+		)
+		return torch_home
+	filename = key + "_imagenet.pth"
+	# Try ethology/models/ directory first
+	ethology_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
+	models_dir = os.path.join(ethology_root, "models")
+	os.makedirs(models_dir, exist_ok=True)
+	local_file = os.path.join(models_dir, filename)
+	torch_home = _get_torch_home()
+	model_dir = os.path.join(torch_home, "checkpoints")
+	os.makedirs(model_dir, exist_ok=True)
+	cached_file = os.path.join(model_dir, filename)
+	# Prefer ethology/models/ directory file if present
+	if os.path.exists(local_file):
+		print(f"[OSNet] Loading model weights from {local_file}")
+		cached_file = local_file
+	elif os.path.exists(cached_file):
+		print(f"[OSNet] Loading model weights from {cached_file}")
+	else:
+		print(f"[OSNet] Downloading model weights to {cached_file}")
+		gdown.download(pretrained_urls[key], cached_file, quiet=False)
+	state_dict = torch.load(cached_file)
+	model_dict = model.state_dict()
+	new_state_dict = OrderedDict()
+	matched_layers, discarded_layers = [], []
+	for k, v in state_dict.items():
+		if k.startswith("module."):
+			k = k[7:]
+		if k in model_dict and model_dict[k].size() == v.size():
+			new_state_dict[k] = v
+			matched_layers.append(k)
+		else:
+			discarded_layers.append(k)
+	model_dict.update(new_state_dict)
+	model.load_state_dict(model_dict)
+	if len(matched_layers) == 0:
+		warnings.warn(
+			'The pretrained weights from "{}" cannot be loaded, '
+			"please check the key names manually "
+			"(** ignored and continue **)".format(cached_file)
+		)
+	else:
+		print(
+			'Successfully loaded imagenet pretrained weights from "{}"'.format(
+				cached_file
+			)
+		)
+		if len(discarded_layers) > 0:
+			print(
+				"** The following layers are discarded "
+				"due to unmatched keys or layer size: {}".format(discarded_layers)
+			)
 
 def osnet_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-    model = OSNet(
-        num_classes,
-        blocks=[OSBlock, OSBlock, OSBlock],
-        layers=[2, 2, 2],
-        channels=[64, 256, 384, 512],
-        loss=loss,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_x1_0")
-    return model
-
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x1_0")
+	return model
 
 def osnet_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-    model = OSNet(
-        num_classes,
-        blocks=[OSBlock, OSBlock, OSBlock],
-        layers=[2, 2, 2],
-        channels=[48, 192, 288, 384],
-        loss=loss,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_x0_75")
-    return model
-
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x0_75")
+	return model
 
 def osnet_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-    model = OSNet(
-        num_classes,
-        blocks=[OSBlock, OSBlock, OSBlock],
-        layers=[2, 2, 2],
-        channels=[32, 128, 192, 256],
-        loss=loss,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_x0_5")
-    return model
-
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x0_5")
+	return model
 
 def osnet_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-    model = OSNet(
-        num_classes,
-        blocks=[OSBlock, OSBlock, OSBlock],
-        layers=[2, 2, 2],
-        channels=[16, 64, 96, 128],
-        loss=loss,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_x0_25")
-    return model
-
-
-def osnet_ibn_x1_0(
-    num_classes=1000, pretrained=True, loss="softmax", **kwargs
-):
-    model = OSNet(
-        num_classes,
-        blocks=[OSBlock, OSBlock, OSBlock],
-        layers=[2, 2, 2],
-        channels=[64, 256, 384, 512],
-        loss=loss,
-        IN=True,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_ibn_x1_0")
-    return model
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_x0_25")
+	return model
+
+def osnet_ibn_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ibn_x1_0")
+	return model
diff --git a/ethology/reid/backbones/osnet_ain.py b/ethology/reid/backbones/osnet_ain.py
index 2ef3da25..9e052209 100644
--- a/ethology/reid/backbones/osnet_ain.py
+++ b/ethology/reid/backbones/osnet_ain.py
@@ -1,5 +1,6 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
+from __future__ import absolute_import, division
 
 import warnings
 
@@ -7,541 +8,349 @@
 from torch import nn
 from torch.nn import functional as F
 
-__all__ = [
-    "osnet_ain_x1_0",
-    "osnet_ain_x0_75",
-    "osnet_ain_x0_5",
-    "osnet_ain_x0_25",
-]
+__all__ = ["osnet_ain_x1_0", "osnet_ain_x0_75", "osnet_ain_x0_5", "osnet_ain_x0_25"]
 
 pretrained_urls = {
-    "osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
-    "osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
-    "osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
-    "osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
+	"osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
+	"osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
+	"osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
+	"osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
 }
 
 # ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, LightConvStream, ChannelGate, OSBlock, OSBlockINin, OSNet, init_pretrained_weights, and instantiation functions...
 
-
 class ConvLayer(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        out_channels,
-        kernel_size,
-        stride=1,
-        padding=0,
-        groups=1,
-        IN=False,
-    ):
-        super(ConvLayer, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            kernel_size,
-            stride=stride,
-            padding=padding,
-            bias=False,
-            groups=groups,
-        )
-        if IN:
-            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
-        else:
-            self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        return self.relu(x)
-
+	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
+		super(ConvLayer, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
+		if IN:
+			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+		else:
+			self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return self.relu(x)
 
 class Conv1x1(nn.Module):
-    def __init__(self, in_channels, out_channels, stride=1, groups=1):
-        super(Conv1x1, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            1,
-            stride=stride,
-            padding=0,
-            bias=False,
-            groups=groups,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        return self.relu(x)
-
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv1x1, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return self.relu(x)
 
 class Conv1x1Linear(nn.Module):
-    def __init__(self, in_channels, out_channels, stride=1, bn=True):
-        super(Conv1x1Linear, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
-        )
-        self.bn = None
-        if bn:
-            self.bn = nn.BatchNorm2d(out_channels)
-
-    def forward(self, x):
-        x = self.conv(x)
-        if self.bn is not None:
-            x = self.bn(x)
-        return x
-
+	def __init__(self, in_channels, out_channels, stride=1, bn=True):
+		super(Conv1x1Linear, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
+		self.bn = None
+		if bn:
+			self.bn = nn.BatchNorm2d(out_channels)
+	def forward(self, x):
+		x = self.conv(x)
+		if self.bn is not None:
+			x = self.bn(x)
+		return x
 
 class Conv3x3(nn.Module):
-    def __init__(self, in_channels, out_channels, stride=1, groups=1):
-        super(Conv3x3, self).__init__()
-        self.conv = nn.Conv2d(
-            in_channels,
-            out_channels,
-            3,
-            stride=stride,
-            padding=1,
-            bias=False,
-            groups=groups,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv(x)
-        x = self.bn(x)
-        return self.relu(x)
-
+	def __init__(self, in_channels, out_channels, stride=1, groups=1):
+		super(Conv3x3, self).__init__()
+		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv(x)
+		x = self.bn(x)
+		return self.relu(x)
 
 class LightConv3x3(nn.Module):
-    def __init__(self, in_channels, out_channels):
-        super(LightConv3x3, self).__init__()
-        self.conv1 = nn.Conv2d(
-            in_channels, out_channels, 1, stride=1, padding=0, bias=False
-        )
-        self.conv2 = nn.Conv2d(
-            out_channels,
-            out_channels,
-            3,
-            stride=1,
-            padding=1,
-            bias=False,
-            groups=out_channels,
-        )
-        self.bn = nn.BatchNorm2d(out_channels)
-        self.relu = nn.ReLU()
-
-    def forward(self, x):
-        x = self.conv1(x)
-        x = self.conv2(x)
-        x = self.bn(x)
-        return self.relu(x)
-
+	def __init__(self, in_channels, out_channels):
+		super(LightConv3x3, self).__init__()
+		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
+		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
+		self.bn = nn.BatchNorm2d(out_channels)
+		self.relu = nn.ReLU()
+	def forward(self, x):
+		x = self.conv1(x)
+		x = self.conv2(x)
+		x = self.bn(x)
+		return self.relu(x)
 
 class LightConvStream(nn.Module):
-    def __init__(self, in_channels, out_channels, depth):
-        super(LightConvStream, self).__init__()
-        assert depth >= 1
-        layers = [LightConv3x3(in_channels, out_channels)]
-        for i in range(depth - 1):
-            layers.append(LightConv3x3(out_channels, out_channels))
-        self.layers = nn.Sequential(*layers)
-
-    def forward(self, x):
-        return self.layers(x)
-
+	def __init__(self, in_channels, out_channels, depth):
+		super(LightConvStream, self).__init__()
+		assert depth >= 1
+		layers = [LightConv3x3(in_channels, out_channels)]
+		for i in range(depth - 1):
+			layers.append(LightConv3x3(out_channels, out_channels))
+		self.layers = nn.Sequential(*layers)
+	def forward(self, x):
+		return self.layers(x)
 
 class ChannelGate(nn.Module):
-    def __init__(
-        self,
-        in_channels,
-        num_gates=None,
-        return_gates=False,
-        gate_activation="sigmoid",
-        reduction=16,
-        layer_norm=False,
-    ):
-        super(ChannelGate, self).__init__()
-        if num_gates is None:
-            num_gates = in_channels
-        self.return_gates = return_gates
-        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-        self.fc1 = nn.Conv2d(
-            in_channels,
-            in_channels // reduction,
-            kernel_size=1,
-            bias=True,
-            padding=0,
-        )
-        self.norm1 = None
-        if layer_norm:
-            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
-        self.relu = nn.ReLU()
-        self.fc2 = nn.Conv2d(
-            in_channels // reduction,
-            num_gates,
-            kernel_size=1,
-            bias=True,
-            padding=0,
-        )
-        if gate_activation == "sigmoid":
-            self.gate_activation = nn.Sigmoid()
-        elif gate_activation == "relu":
-            self.gate_activation = nn.ReLU()
-        elif gate_activation == "linear":
-            self.gate_activation = None
-        else:
-            raise RuntimeError(f"Unknown gate activation: {gate_activation}")
-
-    def forward(self, x):
-        input = x
-        x = self.global_avgpool(x)
-        x = self.fc1(x)
-        if self.norm1 is not None:
-            x = self.norm1(x)
-        x = self.relu(x)
-        x = self.fc2(x)
-        if self.gate_activation is not None:
-            x = self.gate_activation(x)
-        if self.return_gates:
-            return x
-        return input * x
-
+	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
+		super(ChannelGate, self).__init__()
+		if num_gates is None:
+			num_gates = in_channels
+		self.return_gates = return_gates
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
+		self.norm1 = None
+		if layer_norm:
+			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+		self.relu = nn.ReLU()
+		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
+		if gate_activation == "sigmoid":
+			self.gate_activation = nn.Sigmoid()
+		elif gate_activation == "relu":
+			self.gate_activation = nn.ReLU()
+		elif gate_activation == "linear":
+			self.gate_activation = None
+		else:
+			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
+	def forward(self, x):
+		input = x
+		x = self.global_avgpool(x)
+		x = self.fc1(x)
+		if self.norm1 is not None:
+			x = self.norm1(x)
+		x = self.relu(x)
+		x = self.fc2(x)
+		if self.gate_activation is not None:
+			x = self.gate_activation(x)
+		if self.return_gates:
+			return x
+		return input * x
 
 class OSBlock(nn.Module):
-    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
-        super(OSBlock, self).__init__()
-        assert T >= 1
-        assert out_channels >= reduction and out_channels % reduction == 0
-        mid_channels = out_channels // reduction
-        self.conv1 = Conv1x1(in_channels, mid_channels)
-        self.conv2 = nn.ModuleList(
-            [
-                LightConvStream(mid_channels, mid_channels, t)
-                for t in range(1, T + 1)
-            ]
-        )
-        self.gate = ChannelGate(mid_channels)
-        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
-        self.downsample = None
-        if in_channels != out_channels:
-            self.downsample = Conv1x1Linear(in_channels, out_channels)
-
-    def forward(self, x):
-        identity = x
-        x1 = self.conv1(x)
-        x2 = 0
-        for conv2_t in self.conv2:
-            x2_t = conv2_t(x1)
-            x2 = x2 + self.gate(x2_t)
-        x3 = self.conv3(x2)
-        if self.downsample is not None:
-            identity = self.downsample(identity)
-        out = x3 + identity
-        return F.relu(out)
-
+	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+		super(OSBlock, self).__init__()
+		assert T >= 1
+		assert out_channels >= reduction and out_channels % reduction == 0
+		mid_channels = out_channels // reduction
+		self.conv1 = Conv1x1(in_channels, mid_channels)
+		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
+		self.gate = ChannelGate(mid_channels)
+		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+		self.downsample = None
+		if in_channels != out_channels:
+			self.downsample = Conv1x1Linear(in_channels, out_channels)
+	def forward(self, x):
+		identity = x
+		x1 = self.conv1(x)
+		x2 = 0
+		for conv2_t in self.conv2:
+			x2_t = conv2_t(x1)
+			x2 = x2 + self.gate(x2_t)
+		x3 = self.conv3(x2)
+		if self.downsample is not None:
+			identity = self.downsample(identity)
+		out = x3 + identity
+		return F.relu(out)
 
 class OSBlockINin(nn.Module):
-    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
-        super(OSBlockINin, self).__init__()
-        assert T >= 1
-        assert out_channels >= reduction and out_channels % reduction == 0
-        mid_channels = out_channels // reduction
-        self.conv1 = Conv1x1(in_channels, mid_channels)
-        self.conv2 = nn.ModuleList(
-            [
-                LightConvStream(mid_channels, mid_channels, t)
-                for t in range(1, T + 1)
-            ]
-        )
-        self.gate = ChannelGate(mid_channels)
-        self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
-        self.downsample = None
-        if in_channels != out_channels:
-            self.downsample = Conv1x1Linear(in_channels, out_channels)
-        self.IN = nn.InstanceNorm2d(out_channels, affine=True)
-
-    def forward(self, x):
-        identity = x
-        x1 = self.conv1(x)
-        x2 = 0
-        for conv2_t in self.conv2:
-            x2_t = conv2_t(x1)
-            x2 = x2 + self.gate(x2_t)
-        x3 = self.conv3(x2)
-        x3 = self.IN(x3)
-        if self.downsample is not None:
-            identity = self.downsample(identity)
-        out = x3 + identity
-        return F.relu(out)
-
+	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+		super(OSBlockINin, self).__init__()
+		assert T >= 1
+		assert out_channels >= reduction and out_channels % reduction == 0
+		mid_channels = out_channels // reduction
+		self.conv1 = Conv1x1(in_channels, mid_channels)
+		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
+		self.gate = ChannelGate(mid_channels)
+		self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
+		self.downsample = None
+		if in_channels != out_channels:
+			self.downsample = Conv1x1Linear(in_channels, out_channels)
+		self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+	def forward(self, x):
+		identity = x
+		x1 = self.conv1(x)
+		x2 = 0
+		for conv2_t in self.conv2:
+			x2_t = conv2_t(x1)
+			x2 = x2 + self.gate(x2_t)
+		x3 = self.conv3(x2)
+		x3 = self.IN(x3)
+		if self.downsample is not None:
+			identity = self.downsample(identity)
+		out = x3 + identity
+		return F.relu(out)
 
 class OSNet(nn.Module):
-    def __init__(
-        self,
-        num_classes,
-        blocks,
-        layers,
-        channels,
-        feature_dim=512,
-        loss="softmax",
-        conv1_IN=False,
-        **kwargs,
-    ):
-        super(OSNet, self).__init__()
-        num_blocks = len(blocks)
-        assert num_blocks == len(layers)
-        assert num_blocks == len(channels) - 1
-        self.loss = loss
-        self.feature_dim = feature_dim
-        self.conv1 = ConvLayer(
-            3, channels[0], 7, stride=2, padding=3, IN=conv1_IN
-        )
-        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-        self.conv2 = self._make_layer(
-            blocks[0], layers[0], channels[0], channels[1]
-        )
-        self.pool2 = nn.Sequential(
-            Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2)
-        )
-        self.conv3 = self._make_layer(
-            blocks[1], layers[1], channels[1], channels[2]
-        )
-        self.pool3 = nn.Sequential(
-            Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2)
-        )
-        self.conv4 = self._make_layer(
-            blocks[2], layers[2], channels[2], channels[3]
-        )
-        self.conv5 = Conv1x1(channels[3], channels[3])
-        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-        self.fc = self._construct_fc_layer(
-            self.feature_dim, channels[3], dropout_p=None
-        )
-        self.classifier = nn.Linear(self.feature_dim, num_classes)
-        self._init_params()
-
-    def _make_layer(self, blocks, layer, in_channels, out_channels):
-        layers = []
-        layers += [blocks[0](in_channels, out_channels)]
-        for i in range(1, len(blocks)):
-            layers += [blocks[i](out_channels, out_channels)]
-        return nn.Sequential(*layers)
-
-    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-        if fc_dims is None or fc_dims < 0:
-            self.feature_dim = input_dim
-            return None
-        if isinstance(fc_dims, int):
-            fc_dims = [fc_dims]
-        layers = []
-        for dim in fc_dims:
-            layers.append(nn.Linear(input_dim, dim))
-            layers.append(nn.BatchNorm1d(dim))
-            layers.append(nn.ReLU())
-            if dropout_p is not None:
-                layers.append(nn.Dropout(p=dropout_p))
-            input_dim = dim
-        self.feature_dim = fc_dims[-1]
-        return nn.Sequential(*layers)
-
-    def _init_params(self):
-        for m in self.modules():
-            if isinstance(m, nn.Conv2d):
-                nn.init.kaiming_normal_(
-                    m.weight, mode="fan_out", nonlinearity="relu"
-                )
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-            elif (
-                isinstance(m, nn.BatchNorm2d)
-                or isinstance(m, nn.BatchNorm1d)
-                or isinstance(m, nn.InstanceNorm2d)
-            ):
-                nn.init.constant_(m.weight, 1)
-                nn.init.constant_(m.bias, 0)
-            elif isinstance(m, nn.Linear):
-                nn.init.normal_(m.weight, 0, 0.01)
-                if m.bias is not None:
-                    nn.init.constant_(m.bias, 0)
-
-    def featuremaps(self, x):
-        x = self.conv1(x)
-        x = self.maxpool(x)
-        x = self.conv2(x)
-        x = self.pool2(x)
-        x = self.conv3(x)
-        x = self.pool3(x)
-        x = self.conv4(x)
-        x = self.conv5(x)
-        return x
-
-    def forward(self, x, return_featuremaps=False):
-        x = self.featuremaps(x)
-        if return_featuremaps:
-            return x
-        v = self.global_avgpool(x)
-        v = v.view(v.size(0), -1)
-        if self.fc is not None:
-            v = self.fc(v)
-        if not self.training:
-            return v
-        y = self.classifier(v)
-        if self.loss == "softmax":
-            return y
-        elif self.loss == "triplet":
-            return y, v
-        else:
-            raise KeyError(f"Unsupported loss: {self.loss}")
-
+	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", conv1_IN=False, **kwargs):
+		super(OSNet, self).__init__()
+		num_blocks = len(blocks)
+		assert num_blocks == len(layers)
+		assert num_blocks == len(channels) - 1
+		self.loss = loss
+		self.feature_dim = feature_dim
+		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=conv1_IN)
+		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1])
+		self.pool2 = nn.Sequential(Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2))
+		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2])
+		self.pool3 = nn.Sequential(Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2))
+		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3])
+		self.conv5 = Conv1x1(channels[3], channels[3])
+		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+		self._init_params()
+	def _make_layer(self, blocks, layer, in_channels, out_channels):
+		layers = []
+		layers += [blocks[0](in_channels, out_channels)]
+		for i in range(1, len(blocks)):
+			layers += [blocks[i](out_channels, out_channels)]
+		return nn.Sequential(*layers)
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		if fc_dims is None or fc_dims < 0:
+			self.feature_dim = input_dim
+			return None
+		if isinstance(fc_dims, int):
+			fc_dims = [fc_dims]
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU())
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+		self.feature_dim = fc_dims[-1]
+		return nn.Sequential(*layers)
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.InstanceNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.maxpool(x)
+		x = self.conv2(x)
+		x = self.pool2(x)
+		x = self.conv3(x)
+		x = self.pool3(x)
+		x = self.conv4(x)
+		x = self.conv5(x)
+		return x
+	def forward(self, x, return_featuremaps=False):
+		x = self.featuremaps(x)
+		if return_featuremaps:
+			return x
+		v = self.global_avgpool(x)
+		v = v.view(v.size(0), -1)
+		if self.fc is not None:
+			v = self.fc(v)
+		if not self.training:
+			return v
+		y = self.classifier(v)
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
 
 def init_pretrained_weights(model, key=""):
-    import errno
-    import os
-    from collections import OrderedDict
-
-    import gdown
-
-    def _get_torch_home():
-        ENV_TORCH_HOME = "TORCH_HOME"
-        ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
-        DEFAULT_CACHE_DIR = "~/.cache"
-        torch_home = os.path.expanduser(
-            os.getenv(
-                ENV_TORCH_HOME,
-                os.path.join(
-                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"
-                ),
-            )
-        )
-        return torch_home
-
-    torch_home = _get_torch_home()
-    model_dir = os.path.join(torch_home, "checkpoints")
-    try:
-        os.makedirs(model_dir)
-    except OSError as e:
-        if e.errno == errno.EEXIST:
-            pass
-        else:
-            raise
-    filename = key + "_imagenet.pth"
-    cached_file = os.path.join(model_dir, filename)
-    if not os.path.exists(cached_file):
-        gdown.download(pretrained_urls[key], cached_file, quiet=False)
-    state_dict = torch.load(cached_file)
-    model_dict = model.state_dict()
-    new_state_dict = OrderedDict()
-    matched_layers, discarded_layers = [], []
-    for k, v in state_dict.items():
-        if k.startswith("module."):
-            k = k[7:]
-        if k in model_dict and model_dict[k].size() == v.size():
-            new_state_dict[k] = v
-            matched_layers.append(k)
-        else:
-            discarded_layers.append(k)
-    model_dict.update(new_state_dict)
-    model.load_state_dict(model_dict)
-    if len(matched_layers) == 0:
-        warnings.warn(
-            f'The pretrained weights from "{cached_file}" cannot be loaded, '
-            "please check the key names manually "
-            "(** ignored and continue **)"
-        )
-    else:
-        print(
-            f'Successfully loaded imagenet pretrained weights from "{cached_file}"'
-        )
-        if len(discarded_layers) > 0:
-            print(
-                "** The following layers are discarded "
-                f"due to unmatched keys or layer size: {discarded_layers}"
-            )
-
-
-def osnet_ain_x1_0(
-    num_classes=1000, pretrained=True, loss="softmax", **kwargs
-):
-    model = OSNet(
-        num_classes,
-        blocks=[
-            [OSBlockINin, OSBlockINin],
-            [OSBlock, OSBlockINin],
-            [OSBlockINin, OSBlock],
-        ],
-        layers=[2, 2, 2],
-        channels=[64, 256, 384, 512],
-        loss=loss,
-        conv1_IN=True,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_ain_x1_0")
-    return model
-
-
-def osnet_ain_x0_75(
-    num_classes=1000, pretrained=True, loss="softmax", **kwargs
-):
-    model = OSNet(
-        num_classes,
-        blocks=[
-            [OSBlockINin, OSBlockINin],
-            [OSBlock, OSBlockINin],
-            [OSBlockINin, OSBlock],
-        ],
-        layers=[2, 2, 2],
-        channels=[48, 192, 288, 384],
-        loss=loss,
-        conv1_IN=True,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_ain_x0_75")
-    return model
-
-
-def osnet_ain_x0_5(
-    num_classes=1000, pretrained=True, loss="softmax", **kwargs
-):
-    model = OSNet(
-        num_classes,
-        blocks=[
-            [OSBlockINin, OSBlockINin],
-            [OSBlock, OSBlockINin],
-            [OSBlockINin, OSBlock],
-        ],
-        layers=[2, 2, 2],
-        channels=[32, 128, 192, 256],
-        loss=loss,
-        conv1_IN=True,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_ain_x0_5")
-    return model
-
-
-def osnet_ain_x0_25(
-    num_classes=1000, pretrained=True, loss="softmax", **kwargs
-):
-    model = OSNet(
-        num_classes,
-        blocks=[
-            [OSBlockINin, OSBlockINin],
-            [OSBlock, OSBlockINin],
-            [OSBlockINin, OSBlock],
-        ],
-        layers=[2, 2, 2],
-        channels=[16, 64, 96, 128],
-        loss=loss,
-        conv1_IN=True,
-        **kwargs,
-    )
-    if pretrained:
-        init_pretrained_weights(model, key="osnet_ain_x0_25")
-    return model
+	import errno
+	import os
+	from collections import OrderedDict
+	import gdown
+	def _get_torch_home():
+		ENV_TORCH_HOME = "TORCH_HOME"
+		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+		DEFAULT_CACHE_DIR = "~/.cache"
+		torch_home = os.path.expanduser(
+			os.getenv(
+				ENV_TORCH_HOME,
+				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
+			)
+		)
+		return torch_home
+	torch_home = _get_torch_home()
+	model_dir = os.path.join(torch_home, "checkpoints")
+	try:
+		os.makedirs(model_dir)
+	except OSError as e:
+		if e.errno == errno.EEXIST:
+			pass
+		else:
+			raise
+	filename = key + "_imagenet.pth"
+	cached_file = os.path.join(model_dir, filename)
+	if not os.path.exists(cached_file):
+		gdown.download(pretrained_urls[key], cached_file, quiet=False)
+	state_dict = torch.load(cached_file)
+	model_dict = model.state_dict()
+	new_state_dict = OrderedDict()
+	matched_layers, discarded_layers = [], []
+	for k, v in state_dict.items():
+		if k.startswith("module."):
+			k = k[7:]
+		if k in model_dict and model_dict[k].size() == v.size():
+			new_state_dict[k] = v
+			matched_layers.append(k)
+		else:
+			discarded_layers.append(k)
+	model_dict.update(new_state_dict)
+	model.load_state_dict(model_dict)
+	if len(matched_layers) == 0:
+		warnings.warn(
+			'The pretrained weights from "{}" cannot be loaded, '
+			"please check the key names manually "
+			"(** ignored and continue **)".format(cached_file)
+		)
+	else:
+		print(
+			'Successfully loaded imagenet pretrained weights from "{}"'.format(
+				cached_file
+			)
+		)
+		if len(discarded_layers) > 0:
+			print(
+				"** The following layers are discarded "
+				"due to unmatched keys or layer size: {}".format(discarded_layers)
+			)
+
+def osnet_ain_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x1_0")
+	return model
+
+def osnet_ain_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x0_75")
+	return model
+
+def osnet_ain_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x0_5")
+	return model
+
+def osnet_ain_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
+	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, conv1_IN=True, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, key="osnet_ain_x0_25")
+	return model
diff --git a/ethology/reid/backbones/resnet.py b/ethology/reid/backbones/resnet.py
index 80eda98e..7cf28df1 100644
--- a/ethology/reid/backbones/resnet.py
+++ b/ethology/reid/backbones/resnet.py
@@ -1,11 +1,19 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
+<<<<<<< HEAD
 """Code source: https://github.com/pytorch/vision"""
+=======
+"""
+Code source: https://github.com/pytorch/vision
+"""
+from __future__ import absolute_import, division
+>>>>>>> a4dd694 (style(reid): fix ruff errors in hacnn.py and mlfn.py\n\n- Add missing docstrings\n- Use super() instead of super(Class, self)\n- Avoid mutable default arguments\n- Fix long lines and other ruff issues)
 
 import torch.utils.model_zoo as model_zoo
 from torch import nn
 
 __all__ = [
+<<<<<<< HEAD
     "resnet18",
     "resnet34",
     "resnet50",
@@ -24,10 +32,31 @@
     "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
     "resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
     "resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+=======
+	"resnet18",
+	"resnet34",
+	"resnet50",
+	"resnet101",
+	"resnet152",
+	"resnext50_32x4d",
+	"resnext101_32x8d",
+	"resnet50_fc512",
+]
+
+model_urls = {
+	"resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
+	"resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
+	"resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
+	"resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
+	"resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
+	"resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
+	"resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
+>>>>>>> a4dd694 (style(reid): fix ruff errors in hacnn.py and mlfn.py\n\n- Add missing docstrings\n- Use super() instead of super(Class, self)\n- Avoid mutable default arguments\n- Fix long lines and other ruff issues)
 }
 
 # ...existing code for conv3x3, conv1x1, BasicBlock, Bottleneck, ResNet, init_pretrained_weights, and instantiation functions...
 
+<<<<<<< HEAD
 
 def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
     return nn.Conv2d(
@@ -459,3 +488,245 @@ def resnet50_fc512(num_classes, loss="softmax", pretrained=True, **kwargs):
     if pretrained:
         init_pretrained_weights(model, model_urls["resnet50"])
     return model
+=======
+def conv3x3(in_planes, out_planes, stride=1, groups=1, dilation=1):
+	return nn.Conv2d(
+		in_planes,
+		out_planes,
+		kernel_size=3,
+		stride=stride,
+		padding=dilation,
+		groups=groups,
+		bias=False,
+		dilation=dilation,
+	)
+
+def conv1x1(in_planes, out_planes, stride=1):
+	return nn.Conv2d(in_planes, out_planes, kernel_size=1, stride=stride, bias=False)
+
+class BasicBlock(nn.Module):
+	expansion = 1
+	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
+		super(BasicBlock, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		if groups != 1 or base_width != 64:
+			raise ValueError("BasicBlock only supports groups=1 and base_width=64")
+		if dilation > 1:
+			raise NotImplementedError("Dilation > 1 not supported in BasicBlock")
+		self.conv1 = conv3x3(inplanes, planes, stride)
+		self.bn1 = norm_layer(planes)
+		self.relu = nn.ReLU(inplace=True)
+		self.conv2 = conv3x3(planes, planes)
+		self.bn2 = norm_layer(planes)
+		self.downsample = downsample
+		self.stride = stride
+	def forward(self, x):
+		identity = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		if self.downsample is not None:
+			identity = self.downsample(x)
+		out += identity
+		out = self.relu(out)
+		return out
+
+class Bottleneck(nn.Module):
+	expansion = 4
+	def __init__(self, inplanes, planes, stride=1, downsample=None, groups=1, base_width=64, dilation=1, norm_layer=None):
+		super(Bottleneck, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		width = int(planes * (base_width / 64.0)) * groups
+		self.conv1 = conv1x1(inplanes, width)
+		self.bn1 = norm_layer(width)
+		self.conv2 = conv3x3(width, width, stride, groups, dilation)
+		self.bn2 = norm_layer(width)
+		self.conv3 = conv1x1(width, planes * self.expansion)
+		self.bn3 = norm_layer(planes * self.expansion)
+		self.relu = nn.ReLU(inplace=True)
+		self.downsample = downsample
+		self.stride = stride
+	def forward(self, x):
+		identity = x
+		out = self.conv1(x)
+		out = self.bn1(out)
+		out = self.relu(out)
+		out = self.conv2(out)
+		out = self.bn2(out)
+		out = self.relu(out)
+		out = self.conv3(out)
+		out = self.bn3(out)
+		if self.downsample is not None:
+			identity = self.downsample(x)
+		out += identity
+		out = self.relu(out)
+		return out
+
+class ResNet(nn.Module):
+	def __init__(self, num_classes, loss, block, layers, zero_init_residual=False, groups=1, width_per_group=64, replace_stride_with_dilation=None, norm_layer=None, last_stride=2, fc_dims=None, dropout_p=None, **kwargs):
+		super(ResNet, self).__init__()
+		if norm_layer is None:
+			norm_layer = nn.BatchNorm2d
+		self._norm_layer = norm_layer
+		self.loss = loss
+		self.feature_dim = 512 * block.expansion
+		self.inplanes = 64
+		self.dilation = 1
+		if replace_stride_with_dilation is None:
+			replace_stride_with_dilation = [False, False, False]
+		if len(replace_stride_with_dilation) != 3:
+			raise ValueError("replace_stride_with_dilation should be None or a 3-element tuple, got {}".format(replace_stride_with_dilation))
+		self.groups = groups
+		self.base_width = width_per_group
+		self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3, bias=False)
+		self.bn1 = norm_layer(self.inplanes)
+		self.relu = nn.ReLU(inplace=True)
+		self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
+		self.layer1 = self._make_layer(block, 64, layers[0])
+		self.layer2 = self._make_layer(block, 128, layers[1], stride=2, dilate=replace_stride_with_dilation[0])
+		self.layer3 = self._make_layer(block, 256, layers[2], stride=2, dilate=replace_stride_with_dilation[1])
+		self.layer4 = self._make_layer(block, 512, layers[3], stride=last_stride, dilate=replace_stride_with_dilation[2])
+		self.global_avgpool = nn.AdaptiveAvgPool2d((1, 1))
+		self.fc = self._construct_fc_layer(fc_dims, 512 * block.expansion, dropout_p)
+		self.classifier = nn.Linear(self.feature_dim, num_classes)
+		self._init_params()
+		if zero_init_residual:
+			for m in self.modules():
+				if isinstance(m, Bottleneck):
+					nn.init.constant_(m.bn3.weight, 0)
+				elif isinstance(m, BasicBlock):
+					nn.init.constant_(m.bn2.weight, 0)
+	def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
+		norm_layer = self._norm_layer
+		downsample = None
+		previous_dilation = self.dilation
+		if dilate:
+			self.dilation *= stride
+			stride = 1
+		if stride != 1 or self.inplanes != planes * block.expansion:
+			downsample = nn.Sequential(
+				conv1x1(self.inplanes, planes * block.expansion, stride),
+				norm_layer(planes * block.expansion),
+			)
+		layers = []
+		layers.append(block(self.inplanes, planes, stride, downsample, self.groups, self.base_width, previous_dilation, norm_layer))
+		self.inplanes = planes * block.expansion
+		for _ in range(1, blocks):
+			layers.append(block(self.inplanes, planes, groups=self.groups, base_width=self.base_width, dilation=self.dilation, norm_layer=norm_layer))
+		return nn.Sequential(*layers)
+	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+		if fc_dims is None:
+			self.feature_dim = input_dim
+			return None
+		assert isinstance(fc_dims, (list, tuple)), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
+		layers = []
+		for dim in fc_dims:
+			layers.append(nn.Linear(input_dim, dim))
+			layers.append(nn.BatchNorm1d(dim))
+			layers.append(nn.ReLU(inplace=True))
+			if dropout_p is not None:
+				layers.append(nn.Dropout(p=dropout_p))
+			input_dim = dim
+		self.feature_dim = fc_dims[-1]
+		return nn.Sequential(*layers)
+	def _init_params(self):
+		for m in self.modules():
+			if isinstance(m, nn.Conv2d):
+				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm2d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.BatchNorm1d):
+				nn.init.constant_(m.weight, 1)
+				nn.init.constant_(m.bias, 0)
+			elif isinstance(m, nn.Linear):
+				nn.init.normal_(m.weight, 0, 0.01)
+				if m.bias is not None:
+					nn.init.constant_(m.bias, 0)
+	def featuremaps(self, x):
+		x = self.conv1(x)
+		x = self.bn1(x)
+		x = self.relu(x)
+		x = self.maxpool(x)
+		x = self.layer1(x)
+		x = self.layer2(x)
+		x = self.layer3(x)
+		x = self.layer4(x)
+		return x
+	def forward(self, x):
+		f = self.featuremaps(x)
+		v = self.global_avgpool(f)
+		v = v.view(v.size(0), -1)
+		if self.fc is not None:
+			v = self.fc(v)
+		if not self.training:
+			return v
+		y = self.classifier(v)
+		if self.loss == "softmax":
+			return y
+		elif self.loss == "triplet":
+			return y, v
+		else:
+			raise KeyError("Unsupported loss: {}".format(self.loss))
+
+def init_pretrained_weights(model, model_url):
+	pretrain_dict = model_zoo.load_url(model_url)
+	model_dict = model.state_dict()
+	pretrain_dict = {k: v for k, v in pretrain_dict.items() if k in model_dict and model_dict[k].size() == v.size()}
+	model_dict.update(pretrain_dict)
+	model.load_state_dict(model_dict)
+
+def resnet18(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=BasicBlock, layers=[2, 2, 2, 2], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet18"])
+	return model
+
+def resnet34(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=BasicBlock, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet34"])
+	return model
+
+def resnet50(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet50"])
+	return model
+
+def resnet101(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet101"])
+	return model
+
+def resnet152(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 8, 36, 3], last_stride=2, fc_dims=None, dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet152"])
+	return model
+
+def resnext50_32x4d(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=4, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnext50_32x4d"])
+	return model
+
+def resnext101_32x8d(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 23, 3], last_stride=2, fc_dims=None, dropout_p=None, groups=32, width_per_group=8, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnext101_32x8d"])
+	return model
+
+def resnet50_fc512(num_classes, loss="softmax", pretrained=True, **kwargs):
+	model = ResNet(num_classes=num_classes, loss=loss, block=Bottleneck, layers=[3, 4, 6, 3], last_stride=1, fc_dims=[512], dropout_p=None, **kwargs)
+	if pretrained:
+		init_pretrained_weights(model, model_urls["resnet50"])
+	return model
+>>>>>>> a4dd694 (style(reid): fix ruff errors in hacnn.py and mlfn.py\n\n- Add missing docstrings\n- Use super() instead of super(Class, self)\n- Avoid mutable default arguments\n- Fix long lines and other ruff issues)
diff --git a/ethology/reid/backends/base_backend.py b/ethology/reid/backends/base_backend.py
index 0edb2826..688ec43a 100644
--- a/ethology/reid/backends/base_backend.py
+++ b/ethology/reid/backends/base_backend.py
@@ -1,19 +1,18 @@
+
+import os
 from abc import abstractmethod
 from pathlib import Path
-
 import cv2
 import gdown
 import numpy as np
 import torch
 from filelock import SoftFileLock
-
 from ethology.reid.core.registry import ReIDModelRegistry
-
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 # from ethology.utils.checks import RequirementsChecker  # If needed, implement or set RequirementsChecker
 
-
 class BaseModelBackend:
+
     def __init__(self, weights, device, half):
         self.weights = weights[0] if isinstance(weights, list) else weights
         if isinstance(self.weights, str):
@@ -23,7 +22,7 @@ def __init__(self, weights, device, half):
         self.half = half
         self.model = None
         # Support both string and torch.device for device
-        if hasattr(self.device, "type"):
+        if hasattr(self.device, 'type'):
             self.cuda = torch.cuda.is_available() and self.device.type != "cpu"
         else:
             self.cuda = torch.cuda.is_available() and self.device != "cpu"
@@ -42,19 +41,11 @@ def __init__(self, weights, device, half):
 
         self.load_model(self.weights)
 
-        self.mean_array = torch.tensor(
-            [0.485, 0.456, 0.406], device=self.device
-        ).view(1, 3, 1, 1)
-        self.std_array = torch.tensor(
-            [0.229, 0.224, 0.225], device=self.device
-        ).view(1, 3, 1, 1)
+        self.mean_array = torch.tensor([0.485, 0.456, 0.406], device=self.device).view(1, 3, 1, 1)
+        self.std_array = torch.tensor([0.229, 0.224, 0.225], device=self.device).view(1, 3, 1, 1)
         if "clip" in self.model_name:
-            self.mean_array = torch.tensor(
-                [0.5, 0.5, 0.5], device=self.device
-            ).view(1, 3, 1, 1)
-            self.std_array = torch.tensor(
-                [0.5, 0.5, 0.5], device=self.device
-            ).view(1, 3, 1, 1)
+            self.mean_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
+            self.std_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
 
         if "vehicleid" in self.weights.name or "veri" in self.weights.name:
             input_shape = (256, 256)
@@ -66,6 +57,7 @@ def __init__(self, weights, device, half):
             input_shape = (256, 128)
         self.input_shape = input_shape
 
+
     def get_crops(self, xyxys, img):
         h, w = img.shape[:2]
         interpolation_method = cv2.INTER_LINEAR
@@ -93,6 +85,7 @@ def get_crops(self, xyxys, img):
         crops = (crops - self.mean_array) / self.std_array
         return crops
 
+
     @torch.no_grad()
     def get_features(self, xyxys, img):
         if xyxys.size != 0:
@@ -105,6 +98,7 @@ def get_features(self, xyxys, img):
         features = features / np.linalg.norm(features, axis=-1, keepdims=True)
         return features
 
+
     def warmup(self, imgsz=[(256, 128, 3)]):
         if self.device.type != "cpu":
             im = np.random.randint(0, 255, *imgsz, dtype=np.uint8)
@@ -114,9 +108,11 @@ def warmup(self, imgsz=[(256, 128, 3)]):
             crops = self.inference_preprocess(crops)
             self.forward(crops)
 
+
     def to_numpy(self, x):
         return x.cpu().numpy() if isinstance(x, torch.Tensor) else x
 
+
     def inference_preprocess(self, x):
         if self.half:
             if isinstance(x, torch.Tensor):
@@ -125,34 +121,32 @@ def inference_preprocess(self, x):
             elif isinstance(x, np.ndarray):
                 if x.dtype != np.float16:
                     x = x.astype(np.float16)
-        if hasattr(self, "nhwc") and self.nhwc:
+        if hasattr(self, 'nhwc') and self.nhwc:
             if isinstance(x, torch.Tensor):
                 x = x.permute(0, 2, 3, 1)
             elif isinstance(x, np.ndarray):
                 x = np.transpose(x, (0, 2, 3, 1))
         return x
 
+
     def inference_postprocess(self, features):
         if isinstance(features, (list, tuple)):
             return (
-                self.to_numpy(features[0])
-                if len(features) == 1
-                else [self.to_numpy(x) for x in features]
+                self.to_numpy(features[0]) if len(features) == 1 else [self.to_numpy(x) for x in features]
             )
         else:
             return self.to_numpy(features)
 
+
     @abstractmethod
     def forward(self, im_batch):
-        raise NotImplementedError(
-            "This method should be implemented by subclasses."
-        )
+        raise NotImplementedError("This method should be implemented by subclasses.")
+
 
     @abstractmethod
     def load_model(self, w):
-        raise NotImplementedError(
-            "This method should be implemented by subclasses."
-        )
+        raise NotImplementedError("This method should be implemented by subclasses.")
+
 
     def download_model(self, w):
         if isinstance(w, str):
diff --git a/ethology/reid/backends/onnx_backend.py b/ethology/reid/backends/onnx_backend.py
index 41aefb7f..c7c93017 100644
--- a/ethology/reid/backends/onnx_backend.py
+++ b/ethology/reid/backends/onnx_backend.py
@@ -1,34 +1,31 @@
-from ethology.reid.backends.base_backend import BaseModelBackend
 
+from ethology.reid.backends.base_backend import BaseModelBackend
 
 class ONNXBackend(BaseModelBackend):
-    def __init__(self, weights, device, half):
-        super().__init__(weights, device, half)
-        self.nhwc = False
-        self.half = half
-
-    def load_model(self, w):
-        # ONNXRuntime will attempt to use the first provider, and if it fails or is not
-        # available for some reason, it will fall back to the next provider in the list
-        if self.device.type == "mps":
-            # self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
-            providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
-        elif self.device.type == "cuda":
-            # self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
-            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-        else:
-            # self.checker.check_packages(("onnxruntime==1.18.1",))
-            providers = ["CPUExecutionProvider"]
-        import onnxruntime
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
 
-        self.session = onnxruntime.InferenceSession(
-            str(w), providers=providers
-        )
+	def load_model(self, w):
+		# ONNXRuntime will attempt to use the first provider, and if it fails or is not
+		# available for some reason, it will fall back to the next provider in the list
+		if self.device.type == "mps":
+			# self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
+			providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
+		elif self.device.type == "cuda":
+			# self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
+			providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+		else:
+			# self.checker.check_packages(("onnxruntime==1.18.1",))
+			providers = ["CPUExecutionProvider"]
+		import onnxruntime
+		self.session = onnxruntime.InferenceSession(str(w), providers=providers)
 
-    def forward(self, im_batch):
-        im_batch = im_batch.cpu().numpy()
-        features = self.session.run(
-            [self.session.get_outputs()[0].name],
-            {self.session.get_inputs()[0].name: im_batch},
-        )[0]
-        return features
+	def forward(self, im_batch):
+		im_batch = im_batch.cpu().numpy()
+		features = self.session.run(
+			[self.session.get_outputs()[0].name],
+			{self.session.get_inputs()[0].name: im_batch},
+		)[0]
+		return features
diff --git a/ethology/reid/backends/openvino_backend.py b/ethology/reid/backends/openvino_backend.py
index 0c56a06e..f06392bf 100644
--- a/ethology/reid/backends/openvino_backend.py
+++ b/ethology/reid/backends/openvino_backend.py
@@ -1,49 +1,48 @@
 from pathlib import Path
 
 from ethology.reid.backends.base_backend import BaseModelBackend
-
 # Note: LOGGER can be replaced with print or a local logger if needed
 
-
 class OpenVinoBackend(BaseModelBackend):
-    def __init__(self, weights, device, half):
-        super().__init__(weights, device, half)
-        self.nhwc = False
-        self.half = half
-
-    def load_model(self, w):
-        # self.checker.check_packages(("openvino>=2025.2.0",))
-
-        print(f"Loading {w} for OpenVINO inference...")
-        try:
-            # requires openvino-dev: https://pypi.org/project/openvino-dev/
-            from openvino import Core, Layout
-        except ImportError:
-            print(
-                f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
-                "requires openvino pip package to be installed!\n"
-                "$ pip install openvino>=2025.2.0\n"
-            )
-            raise
-        ie = Core()
-        w = Path(w)
-        print(w)
-        if w.suffix == ".bin":
-            w = w.with_suffix(".xml")
-
-        if not w.is_file():  # if not *.xml
-            w = next(
-                Path(w).glob("*.xml")
-            )  # get *.xml file from *_openvino_model dir
-        network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
-        if network.get_parameters()[0].get_layout().empty:
-            network.get_parameters()[0].set_layout(Layout("NCWH"))
-        self.executable_network = ie.compile_model(
-            network, device_name="CPU"
-        )  # device_name="MYRIAD" for Intel NCS2
-        self.output_layer = next(iter(self.executable_network.outputs))
 
-    def forward(self, im_batch):
-        im_batch = im_batch.cpu().numpy()  # FP32
-        features = self.executable_network([im_batch])[self.output_layer]
-        return features
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		# self.checker.check_packages(("openvino>=2025.2.0",))
+
+		print(f"Loading {w} for OpenVINO inference...")
+		try:
+			# requires openvino-dev: https://pypi.org/project/openvino-dev/
+			from openvino import Core, Layout
+		except ImportError:
+			print(
+				f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
+				"requires openvino pip package to be installed!\n"
+				"$ pip install openvino>=2025.2.0\n"
+			)
+			raise
+		ie = Core()
+		w = Path(w)
+		print(w)
+		if w.suffix == '.bin':
+			w = w.with_suffix('.xml')
+
+		if not w.is_file():  # if not *.xml
+			w = next(
+				Path(w).glob("*.xml")
+			)  # get *.xml file from *_openvino_model dir
+		network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
+		if network.get_parameters()[0].get_layout().empty:
+			network.get_parameters()[0].set_layout(Layout("NCWH"))
+		self.executable_network = ie.compile_model(
+			network, device_name="CPU"
+		)  # device_name="MYRIAD" for Intel NCS2
+		self.output_layer = next(iter(self.executable_network.outputs))
+
+	def forward(self, im_batch):
+		im_batch = im_batch.cpu().numpy()  # FP32
+		features = self.executable_network([im_batch])[self.output_layer]
+		return features
diff --git a/ethology/reid/backends/pytorch_backend.py b/ethology/reid/backends/pytorch_backend.py
index 2e859cc8..d3dbfa06 100644
--- a/ethology/reid/backends/pytorch_backend.py
+++ b/ethology/reid/backends/pytorch_backend.py
@@ -1,20 +1,20 @@
 from ethology.reid.backends.base_backend import BaseModelBackend
 from ethology.reid.core.registry import ReIDModelRegistry
 
-
 class PyTorchBackend(BaseModelBackend):
-    def __init__(self, weights, device, half):
-        super().__init__(weights, device, half)
-        self.nhwc = False
-        self.half = half
 
-    def load_model(self, w):
-        # Load a PyTorch model
-        if w and w.is_file():
-            ReIDModelRegistry.load_pretrained_weights(self.model, w)
-        self.model.to(self.device).eval()
-        self.model.half() if self.half else self.model.float()
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		# Load a PyTorch model
+		if w and w.is_file():
+			ReIDModelRegistry.load_pretrained_weights(self.model, w)
+		self.model.to(self.device).eval()
+		self.model.half() if self.half else self.model.float()
 
-    def forward(self, im_batch):
-        features = self.model(im_batch)
-        return features
+	def forward(self, im_batch):
+		features = self.model(im_batch)
+		return features
diff --git a/ethology/reid/backends/tensorrt_backend.py b/ethology/reid/backends/tensorrt_backend.py
index 4f6e95b0..8dd7d7ee 100644
--- a/ethology/reid/backends/tensorrt_backend.py
+++ b/ethology/reid/backends/tensorrt_backend.py
@@ -1,400 +1,310 @@
-# Note: LOGGER can be replaced with print or a local logger if needed
-import os
 from collections import OrderedDict, namedtuple
 
 import numpy as np
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+# Note: LOGGER can be replaced with print or a local logger if needed
+
+import os
+import sys
+import torch
+import numpy as np
+from collections import namedtuple, OrderedDict
+
+
 
 Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
 
 
 class TensorRTBackend(BaseModelBackend):
-    def __init__(self, engine_path, device=None):
-        import hashlib
-
-        import requests
-
-        self.device = device or (
-            torch.device("cuda")
-            if torch.cuda.is_available()
-            else torch.device("cpu")
-        )
-        self.fp16 = False
-        self.model_ = None
-        self.context = None
-        self.bindings = None
-        self.binding_addrs = None
-        self.is_trt10 = False
-        # Download engine if engine_path is a URL
-        if engine_path.startswith("http://") or engine_path.startswith(
-            "https://"
-        ):
-            # Use a hash of the URL for filename
-            engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
-            filename = f"trt_engine_{engine_hash}.engine"
-            cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
-            os.makedirs(cache_dir, exist_ok=True)
-            cached_file = os.path.join(cache_dir, filename)
-            if not os.path.exists(cached_file):
-                print(
-                    f"[TensorRT] Downloading engine from {engine_path} to {cached_file}"
-                )
-                with requests.get(engine_path, stream=True) as r:
-                    r.raise_for_status()
-                    with open(cached_file, "wb") as f:
-                        for chunk in r.iter_content(chunk_size=8192):
-                            f.write(chunk)
-            else:
-                print(f"[TensorRT] Using cached engine at {cached_file}")
-            self.engine_path = cached_file
-        else:
-            self.engine_path = engine_path
-        self.load_model(self.engine_path)
-
-    def load_model(self, w):
-        print(f"Loading {w} for TensorRT inference...")
-        try:
-            import pycuda.autoinit  # noqa: F401
-            import pycuda.driver as cuda
-            import tensorrt as trt
-        except ImportError:
-            raise ImportError(
-                "TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH."
-            )
-
-        if self.device.type == "cpu":
-            if torch.cuda.is_available():
-                self.device = torch.device("cuda:0")
-            else:
-                raise ValueError(
-                    "CUDA device not available for TensorRT inference."
-                )
-
-        Binding = namedtuple(
-            "Binding", ("name", "dtype", "shape", "data", "ptr")
-        )
-        logger = trt.Logger(trt.Logger.INFO)
-
-        # Deserialize the engine
-        with open(w, "rb") as f, trt.Runtime(logger) as runtime:
-            self.model_ = runtime.deserialize_cuda_engine(f.read())
-
-        # Execution context
-        self.context = self.model_.create_execution_context()
-        self.bindings = OrderedDict()
-
-        self.is_trt10 = not hasattr(self.model_, "num_bindings")
-        num = (
-            range(self.model_.num_io_tensors)
-            if self.is_trt10
-            else range(self.model_.num_bindings)
-        )
-
-        # Parse bindings
-        for index in num:
-            if self.is_trt10:
-                name = self.model_.get_tensor_name(index)
-                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-                is_input = (
-                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-                )
-                if is_input and -1 in tuple(
-                    self.model_.get_tensor_shape(name)
-                ):
-                    self.context.set_input_shape(
-                        name,
-                        tuple(
-                            self.model_.get_tensor_profile_shape(name, 0)[1]
-                        ),
-                    )
-                if is_input and dtype == np.float16:
-                    self.fp16 = True
-
-                shape = tuple(self.context.get_tensor_shape(name))
-
-            else:
-                name = self.model_.get_binding_name(index)
-                dtype = trt.nptype(self.model_.get_binding_dtype(index))
-                is_input = self.model_.binding_is_input(index)
-
-                # Handle dynamic shapes
-                if is_input and -1 in self.model_.get_binding_shape(index):
-                    profile_index = 0
-                    min_shape, opt_shape, max_shape = (
-                        self.model_.get_profile_shape(profile_index, index)
-                    )
-                    self.context.set_binding_shape(index, opt_shape)
-
-                if is_input and dtype == np.float16:
-                    self.fp16 = True
-
-                shape = tuple(self.context.get_binding_shape(index))
-            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
-                self.device
-            )
-            self.bindings[name] = Binding(
-                name, dtype, shape, data, int(data.data_ptr())
-            )
-
-        self.binding_addrs = OrderedDict(
-            (n, d.ptr) for n, d in self.bindings.items()
-        )
-
-        # Execution context
-        self.context = self.model_.create_execution_context()
-        self.bindings = OrderedDict()
-
-        self.is_trt10 = not hasattr(self.model_, "num_bindings")
-        num = (
-            range(self.model_.num_io_tensors)
-            if self.is_trt10
-            else range(self.model_.num_bindings)
-        )
-
-        # Parse bindings
-        for index in num:
-            if self.is_trt10:
-                name = self.model_.get_tensor_name(index)
-                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-                is_input = (
-                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-                )
-                if is_input and -1 in tuple(
-                    self.model_.get_tensor_shape(name)
-                ):
-                    self.context.set_input_shape(
-                        name,
-                        tuple(
-                            self.model_.get_tensor_profile_shape(name, 0)[1]
-                        ),
-                    )
-                if is_input and dtype == np.float16:
-                    self.fp16 = True
-
-                shape = tuple(self.context.get_tensor_shape(name))
-
-            else:
-                name = self.model_.get_binding_name(index)
-                dtype = trt.nptype(self.model_.get_binding_dtype(index))
-                is_input = self.model_.binding_is_input(index)
-
-                # Handle dynamic shapes
-                if is_input and -1 in self.model_.get_binding_shape(index):
-                    profile_index = 0
-                    min_shape, opt_shape, max_shape = (
-                        self.model_.get_profile_shape(profile_index, index)
-                    )
-                    self.context.set_binding_shape(index, opt_shape)
-
-                if is_input and dtype == np.float16:
-                    self.fp16 = True
-
-                shape = tuple(self.context.get_binding_shape(index))
-            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
-                self.device
-            )
-            self.bindings[name] = Binding(
-                name, dtype, shape, data, int(data.data_ptr())
-            )
-
-        self.binding_addrs = OrderedDict(
-            (n, d.ptr) for n, d in self.bindings.items()
-        )
-
-    def forward(self, im_batch):
-        temp_im_batch = im_batch.clone()
-        batch_array = []
-        inp_batch = im_batch.shape[0]
-        out_batch = self.bindings["output"].shape[0]
-        resultant_features = []
-
-        # Divide batch to sub batches
-        while inp_batch > out_batch:
-            batch_array.append(temp_im_batch[:out_batch])
-            temp_im_batch = temp_im_batch[out_batch:]
-            inp_batch = temp_im_batch.shape[0]
-        if temp_im_batch.shape[0] > 0:
-            batch_array.append(temp_im_batch)
-
-        for temp_batch in batch_array:
-            # Adjust for dynamic shapes
-            if temp_batch.shape != self.bindings["images"].shape:
-                if self.is_trt10:
-                    self.context.set_input_shape("images", temp_batch.shape)
-                    self.bindings["images"] = self.bindings["images"]._replace(
-                        shape=temp_batch.shape
-                    )
-                    self.bindings["output"].data.resize_(
-                        tuple(self.context.get_tensor_shape("output"))
-                    )
-                else:
-                    i_in = self.model_.get_binding_index("images")
-                    i_out = self.model_.get_binding_index("output")
-                    self.context.set_binding_shape(i_in, temp_batch.shape)
-                    self.bindings["images"] = self.bindings["images"]._replace(
-                        shape=temp_batch.shape
-                    )
-                    output_shape = tuple(self.context.get_binding_shape(i_out))
-                    self.bindings["output"].data.resize_(output_shape)
-
-            s = self.bindings["images"].shape
-            assert temp_batch.shape == s, (
-                f"Input size {temp_batch.shape} does not match model size {s}"
-            )
-
-            self.binding_addrs["images"] = int(temp_batch.data_ptr())
-
-            # Execute inference
-            self.context.execute_v2(list(self.binding_addrs.values()))
-            features = self.bindings["output"].data
-            resultant_features.append(features.clone())
-
-        if len(resultant_features) == 1:
-            return resultant_features[0]
-        else:
-            rslt_features = torch.cat(resultant_features, dim=0)
-            rslt_features = rslt_features[: im_batch.shape[0]]
-            return rslt_features
-
-    def load_model(self, w):
-        print(f"Loading {w} for TensorRT inference...")
-        # self.checker.check_packages(("nvidia-tensorrt",))
-        try:
-            import tensorrt as trt  # TensorRT library
-        except ImportError:
-            raise ImportError("Please install tensorrt to use this backend.")
-
-        if self.device.type == "cpu":
-            if torch.cuda.is_available():
-                self.device = torch.device("cuda:0")
-            else:
-                raise ValueError(
-                    "CUDA device not available for TensorRT inference."
-                )
-
-        Binding = namedtuple(
-            "Binding", ("name", "dtype", "shape", "data", "ptr")
-        )
-        logger = trt.Logger(trt.Logger.INFO)
-
-        # Deserialize the engine
-        with open(w, "rb") as f, trt.Runtime(logger) as runtime:
-            self.model_ = runtime.deserialize_cuda_engine(f.read())
-
-        # Execution context
-        self.context = self.model_.create_execution_context()
-        self.bindings = OrderedDict()
-
-        self.is_trt10 = not hasattr(self.model_, "num_bindings")
-        num = (
-            range(self.model_.num_io_tensors)
-            if self.is_trt10
-            else range(self.model_.num_bindings)
-        )
-
-        # Parse bindings
-        for index in num:
-            if self.is_trt10:
-                name = self.model_.get_tensor_name(index)
-                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-                is_input = (
-                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-                )
-                if is_input and -1 in tuple(
-                    self.model_.get_tensor_shape(name)
-                ):
-                    self.context.set_input_shape(
-                        name,
-                        tuple(
-                            self.model_.get_tensor_profile_shape(name, 0)[1]
-                        ),
-                    )
-                if is_input and dtype == np.float16:
-                    self.fp16 = True
-
-                shape = tuple(self.context.get_tensor_shape(name))
-
-            else:
-                name = self.model_.get_binding_name(index)
-                dtype = trt.nptype(self.model_.get_binding_dtype(index))
-                is_input = self.model_.binding_is_input(index)
-
-                # Handle dynamic shapes
-                if is_input and -1 in self.model_.get_binding_shape(index):
-                    profile_index = 0
-                    min_shape, opt_shape, max_shape = (
-                        self.model_.get_profile_shape(profile_index, index)
-                    )
-                    self.context.set_binding_shape(index, opt_shape)
-
-                if is_input and dtype == np.float16:
-                    self.fp16 = True
-
-                shape = tuple(self.context.get_binding_shape(index))
-            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
-                self.device
-            )
-            self.bindings[name] = Binding(
-                name, dtype, shape, data, int(data.data_ptr())
-            )
-
-        self.binding_addrs = OrderedDict(
-            (n, d.ptr) for n, d in self.bindings.items()
-        )
-
-    def forward(self, im_batch):
-        temp_im_batch = im_batch.clone()
-        batch_array = []
-        inp_batch = im_batch.shape[0]
-        out_batch = self.bindings["output"].shape[0]
-        resultant_features = []
-
-        # Divide batch to sub batches
-        while inp_batch > out_batch:
-            batch_array.append(temp_im_batch[:out_batch])
-            temp_im_batch = temp_im_batch[out_batch:]
-            inp_batch = temp_im_batch.shape[0]
-        if temp_im_batch.shape[0] > 0:
-            batch_array.append(temp_im_batch)
-
-        for temp_batch in batch_array:
-            # Adjust for dynamic shapes
-            if temp_batch.shape != self.bindings["images"].shape:
-                if self.is_trt10:
-                    self.context.set_input_shape("images", temp_batch.shape)
-                    self.bindings["images"] = self.bindings["images"]._replace(
-                        shape=temp_batch.shape
-                    )
-                    self.bindings["output"].data.resize_(
-                        tuple(self.context.get_tensor_shape("output"))
-                    )
-                else:
-                    i_in = self.model_.get_binding_index("images")
-                    i_out = self.model_.get_binding_index("output")
-                    self.context.set_binding_shape(i_in, temp_batch.shape)
-                    self.bindings["images"] = self.bindings["images"]._replace(
-                        shape=temp_batch.shape
-                    )
-                    output_shape = tuple(self.context.get_binding_shape(i_out))
-                    self.bindings["output"].data.resize_(output_shape)
-
-            s = self.bindings["images"].shape
-            assert temp_batch.shape == s, (
-                f"Input size {temp_batch.shape} does not match model size {s}"
-            )
-
-            self.binding_addrs["images"] = int(temp_batch.data_ptr())
-
-            # Execute inference
-            self.context.execute_v2(list(self.binding_addrs.values()))
-            features = self.bindings["output"].data
-            resultant_features.append(features.clone())
-
-        if len(resultant_features) == 1:
-            return resultant_features[0]
-        else:
-            rslt_features = torch.cat(resultant_features, dim=0)
-            rslt_features = rslt_features[: im_batch.shape[0]]
-            return rslt_features
+	def __init__(self, engine_path, device=None):
+		import hashlib
+		import requests
+		self.device = device or (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
+		self.fp16 = False
+		self.model_ = None
+		self.context = None
+		self.bindings = None
+		self.binding_addrs = None
+		self.is_trt10 = False
+		# Download engine if engine_path is a URL
+		if engine_path.startswith("http://") or engine_path.startswith("https://"):
+			# Use a hash of the URL for filename
+			engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
+			filename = f"trt_engine_{engine_hash}.engine"
+			cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
+			os.makedirs(cache_dir, exist_ok=True)
+			cached_file = os.path.join(cache_dir, filename)
+			if not os.path.exists(cached_file):
+				print(f"[TensorRT] Downloading engine from {engine_path} to {cached_file}")
+				with requests.get(engine_path, stream=True) as r:
+					r.raise_for_status()
+					with open(cached_file, 'wb') as f:
+						for chunk in r.iter_content(chunk_size=8192):
+							f.write(chunk)
+			else:
+				print(f"[TensorRT] Using cached engine at {cached_file}")
+			self.engine_path = cached_file
+		else:
+			self.engine_path = engine_path
+		self.load_model(self.engine_path)
+
+	def load_model(self, w):
+		print(f"Loading {w} for TensorRT inference...")
+		try:
+			import tensorrt as trt
+			import pycuda.driver as cuda
+			import pycuda.autoinit  # noqa: F401
+		except ImportError:
+			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH.")
+
+		if self.device.type == "cpu":
+			if torch.cuda.is_available():
+				self.device = torch.device("cuda:0")
+			else:
+				raise ValueError("CUDA device not available for TensorRT inference.")
+
+		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
+		logger = trt.Logger(trt.Logger.INFO)
+
+		# Deserialize the engine
+		with open(w, "rb") as f:
+			with trt.Runtime(logger) as runtime:
+				self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+		# Execution context
+		self.context = self.model_.create_execution_context()
+		self.bindings = OrderedDict()
+
+		self.is_trt10 = not hasattr(self.model_, "num_bindings")
+		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
+
+		# Parse bindings
+		for index in num:
+			if self.is_trt10:
+				name = self.model_.get_tensor_name(index)
+				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
+					self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_tensor_shape(name))
+
+			else:
+				name = self.model_.get_binding_name(index)
+				dtype = trt.nptype(self.model_.get_binding_dtype(index))
+				is_input = self.model_.binding_is_input(index)
+
+				# Handle dynamic shapes
+				if is_input and -1 in self.model_.get_binding_shape(index):
+					profile_index = 0
+					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
+					self.context.set_binding_shape(index, opt_shape)
+
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_binding_shape(index))
+			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
+			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+
+		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+
+		# Execution context
+		self.context = self.model_.create_execution_context()
+		self.bindings = OrderedDict()
+
+		self.is_trt10 = not hasattr(self.model_, "num_bindings")
+		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
+
+		# Parse bindings
+		for index in num:
+			if self.is_trt10:
+				name = self.model_.get_tensor_name(index)
+				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
+						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_tensor_shape(name))
+
+			else:
+				name = self.model_.get_binding_name(index)
+				dtype = trt.nptype(self.model_.get_binding_dtype(index))
+				is_input = self.model_.binding_is_input(index)
+
+				# Handle dynamic shapes
+				if is_input and -1 in self.model_.get_binding_shape(index):
+					profile_index = 0
+					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
+					self.context.set_binding_shape(index, opt_shape)
+
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_binding_shape(index))
+			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
+			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+
+		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+
+	def forward(self, im_batch):
+		temp_im_batch = im_batch.clone()
+		batch_array = []
+		inp_batch = im_batch.shape[0]
+		out_batch = self.bindings["output"].shape[0]
+		resultant_features = []
+
+		# Divide batch to sub batches
+		while inp_batch > out_batch:
+			batch_array.append(temp_im_batch[:out_batch])
+			temp_im_batch = temp_im_batch[out_batch:]
+			inp_batch = temp_im_batch.shape[0]
+		if temp_im_batch.shape[0] > 0:
+			batch_array.append(temp_im_batch)
+
+		for temp_batch in batch_array:
+			# Adjust for dynamic shapes
+			if temp_batch.shape != self.bindings["images"].shape:
+				if self.is_trt10:
+					self.context.set_input_shape("images", temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
+				else:
+					i_in = self.model_.get_binding_index("images")
+					i_out = self.model_.get_binding_index("output")
+					self.context.set_binding_shape(i_in, temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					output_shape = tuple(self.context.get_binding_shape(i_out))
+					self.bindings["output"].data.resize_(output_shape)
+
+			s = self.bindings["images"].shape
+			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
+
+			self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+			# Execute inference
+			self.context.execute_v2(list(self.binding_addrs.values()))
+			features = self.bindings["output"].data
+			resultant_features.append(features.clone())
+
+		if len(resultant_features) == 1:
+			return resultant_features[0]
+		else:
+			rslt_features = torch.cat(resultant_features, dim=0)
+			rslt_features = rslt_features[: im_batch.shape[0]]
+			return rslt_features
+
+	def load_model(self, w):
+		print(f"Loading {w} for TensorRT inference...")
+		# self.checker.check_packages(("nvidia-tensorrt",))
+		try:
+			import tensorrt as trt  # TensorRT library
+		except ImportError:
+			raise ImportError("Please install tensorrt to use this backend.")
+
+		if self.device.type == "cpu":
+			if torch.cuda.is_available():
+				self.device = torch.device("cuda:0")
+			else:
+				raise ValueError("CUDA device not available for TensorRT inference.")
+
+		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
+		logger = trt.Logger(trt.Logger.INFO)
+
+		# Deserialize the engine
+		with open(w, "rb") as f:
+			with trt.Runtime(logger) as runtime:
+				self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+		# Execution context
+		self.context = self.model_.create_execution_context()
+		self.bindings = OrderedDict()
+
+		self.is_trt10 = not hasattr(self.model_, "num_bindings")
+		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
+
+		# Parse bindings
+		for index in num:
+			if self.is_trt10:
+				name = self.model_.get_tensor_name(index)
+				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
+						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_tensor_shape(name))
+
+			else:
+				name = self.model_.get_binding_name(index)
+				dtype = trt.nptype(self.model_.get_binding_dtype(index))
+				is_input = self.model_.binding_is_input(index)
+
+				# Handle dynamic shapes
+				if is_input and -1 in self.model_.get_binding_shape(index):
+					profile_index = 0
+					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
+					self.context.set_binding_shape(index, opt_shape)
+
+				if is_input and dtype == np.float16:
+					self.fp16 = True
+
+				shape = tuple(self.context.get_binding_shape(index))
+			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
+			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
+
+		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
+
+	def forward(self, im_batch):
+		temp_im_batch = im_batch.clone()
+		batch_array = []
+		inp_batch = im_batch.shape[0]
+		out_batch = self.bindings["output"].shape[0]
+		resultant_features = []
+
+		# Divide batch to sub batches
+		while inp_batch > out_batch:
+			batch_array.append(temp_im_batch[:out_batch])
+			temp_im_batch = temp_im_batch[out_batch:]
+			inp_batch = temp_im_batch.shape[0]
+		if temp_im_batch.shape[0] > 0:
+			batch_array.append(temp_im_batch)
+
+		for temp_batch in batch_array:
+			# Adjust for dynamic shapes
+			if temp_batch.shape != self.bindings["images"].shape:
+				if self.is_trt10:
+					self.context.set_input_shape("images", temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
+				else:
+					i_in = self.model_.get_binding_index("images")
+					i_out = self.model_.get_binding_index("output")
+					self.context.set_binding_shape(i_in, temp_batch.shape)
+					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
+					output_shape = tuple(self.context.get_binding_shape(i_out))
+					self.bindings["output"].data.resize_(output_shape)
+
+			s = self.bindings["images"].shape
+			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
+
+			self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+			# Execute inference
+			self.context.execute_v2(list(self.binding_addrs.values()))
+			features = self.bindings["output"].data
+			resultant_features.append(features.clone())
+
+		if len(resultant_features) == 1:
+			return resultant_features[0]
+		else:
+			rslt_features = torch.cat(resultant_features, dim=0)
+			rslt_features = rslt_features[: im_batch.shape[0]]
+			return rslt_features
diff --git a/ethology/reid/backends/tflite_backend.py b/ethology/reid/backends/tflite_backend.py
index eb10d4e8..b0a7b707 100644
--- a/ethology/reid/backends/tflite_backend.py
+++ b/ethology/reid/backends/tflite_backend.py
@@ -4,39 +4,37 @@
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
-
 # Note: LOGGER can be replaced with print or a local logger if needed
 
-
 class TFLiteBackend(BaseModelBackend):
-    """A class to handle TensorFlow Lite model inference with dynamic batch size support."""
-
-    def __init__(self, weights: Path, device: str, half: bool):
-        super().__init__(weights, device, half)
-        self.nhwc = True
-        self.half = False
-
-    def load_model(self, w):
-        # self.checker.check_packages(("tensorflow",))
-        print(f"Loading {str(w)} for TensorFlow Lite inference...")
-        import tensorflow as tf
-
-        self.interpreter = tf.lite.Interpreter(model_path=str(w))
-        self.interpreter.allocate_tensors()
-        self.input_details = self.interpreter.get_input_details()
-        self.output_details = self.interpreter.get_output_details()
-        self.current_allocated_batch_size = self.input_details[0]["shape"][0]
-
-    def forward(self, im_batch: torch.Tensor) -> np.ndarray:
-        im_batch = im_batch.cpu().numpy()
-        batch_size = im_batch.shape[0]
-        if batch_size != self.current_allocated_batch_size:
-            self.interpreter.resize_tensor_input(
-                self.input_details[0]["index"], [batch_size, 256, 128, 3]
-            )
-            self.interpreter.allocate_tensors()
-            self.current_allocated_batch_size = batch_size
-        self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
-        self.interpreter.invoke()
-        features = self.interpreter.get_tensor(self.output_details[0]["index"])
-        return features
+	"""
+	A class to handle TensorFlow Lite model inference with dynamic batch size support.
+	"""
+	def __init__(self, weights: Path, device: str, half: bool):
+		super().__init__(weights, device, half)
+		self.nhwc = True
+		self.half = False
+
+	def load_model(self, w):
+		# self.checker.check_packages(("tensorflow",))
+		print(f"Loading {str(w)} for TensorFlow Lite inference...")
+		import tensorflow as tf
+		self.interpreter = tf.lite.Interpreter(model_path=str(w))
+		self.interpreter.allocate_tensors()
+		self.input_details = self.interpreter.get_input_details()
+		self.output_details = self.interpreter.get_output_details()
+		self.current_allocated_batch_size = self.input_details[0]["shape"][0]
+
+	def forward(self, im_batch: torch.Tensor) -> np.ndarray:
+		im_batch = im_batch.cpu().numpy()
+		batch_size = im_batch.shape[0]
+		if batch_size != self.current_allocated_batch_size:
+			self.interpreter.resize_tensor_input(
+				self.input_details[0]["index"], [batch_size, 256, 128, 3]
+			)
+			self.interpreter.allocate_tensors()
+			self.current_allocated_batch_size = batch_size
+		self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
+		self.interpreter.invoke()
+		features = self.interpreter.get_tensor(self.output_details[0]["index"])
+		return features
diff --git a/ethology/reid/backends/torchscript_backend.py b/ethology/reid/backends/torchscript_backend.py
index 1142fcc4..b6602171 100644
--- a/ethology/reid/backends/torchscript_backend.py
+++ b/ethology/reid/backends/torchscript_backend.py
@@ -1,21 +1,20 @@
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
-
 # Note: LOGGER can be replaced with print or a local logger if needed
 
-
 class TorchscriptBackend(BaseModelBackend):
-    def __init__(self, weights, device, half):
-        super().__init__(weights, device, half)
-        self.nhwc = False
-        self.half = half
 
-    def load_model(self, w):
-        print(f"Loading {w} for TorchScript inference...")
-        self.model = torch.jit.load(w)
-        self.model.half() if self.half else self.model.float()
+	def __init__(self, weights, device, half):
+		super().__init__(weights, device, half)
+		self.nhwc = False
+		self.half = half
+
+	def load_model(self, w):
+		print(f"Loading {w} for TorchScript inference...")
+		self.model = torch.jit.load(w)
+		self.model.half() if self.half else self.model.float()
 
-    def forward(self, im_batch):
-        features = self.model(im_batch)
-        return features
+	def forward(self, im_batch):
+		features = self.model(im_batch)
+		return features
diff --git a/ethology/reid/core/auto_backend.py b/ethology/reid/core/auto_backend.py
index 22f2c4e2..6f43eba2 100644
--- a/ethology/reid/core/auto_backend.py
+++ b/ethology/reid/core/auto_backend.py
@@ -1,89 +1,74 @@
-from pathlib import Path
 
+from pathlib import Path
+from typing import Tuple, Union
 import torch
-
 from ethology.reid.backends.onnx_backend import ONNXBackend
 from ethology.reid.backends.openvino_backend import OpenVinoBackend
 from ethology.reid.backends.pytorch_backend import PyTorchBackend
-
 try:
-    from ethology.reid.backends.tensorrt_backend import TensorRTBackend
+	from ethology.reid.backends.tensorrt_backend import TensorRTBackend
 except ImportError:
-
-    class TensorRTBackend:
-        def __init__(self, *args, **kwargs):
-            raise ImportError(
-                "TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH."
-            )
-
-
+	class TensorRTBackend:
+		def __init__(self, *args, **kwargs):
+			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH.")
 from ethology.reid.backends.tflite_backend import TFLiteBackend
 from ethology.reid.backends.torchscript_backend import TorchscriptBackend
-
 # from ethology.reid.core import export_formats  # If needed, implement or copy export_formats
 # from ethology.utils import WEIGHTS  # If needed, implement or set WEIGHTS
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 # from ethology.utils.torch_utils import select_device  # If needed, implement or set select_device
 
-
 class ReidAutoBackend:
-    def __init__(
-        self,
-        weights: Path,
-        device: torch.device = torch.device("cpu"),
-        half: bool = False,
-    ):
-        super().__init__()
-        w = weights[0] if isinstance(weights, list) else weights
-        (
-            self.pt,
-            self.pth,
-            self.jit,
-            self.onnx,
-            self.xml,
-            self.engine,
-            self.tflite,
-        ) = self.model_type(w)
-        self.weights = weights
-        self.device = device  # For simplicity, skip select_device for now
-        self.half = half
-        self.model = self.get_backend()
+	def __init__(
+		self,
+		weights: Path,
+		device: torch.device = torch.device("cpu"),
+		half: bool = False,
+	):
+		super().__init__()
+		w = weights[0] if isinstance(weights, list) else weights
+		(
+			self.pt,
+			self.pth,
+			self.jit,
+			self.onnx,
+			self.xml,
+			self.engine,
+			self.tflite,
+		) = self.model_type(w)
+		self.weights = weights
+		self.device = device  # For simplicity, skip select_device for now
+		self.half = half
+		self.model = self.get_backend()
 
-    def get_backend(self):
-        backend_map = {
-            self.pt or self.pth: PyTorchBackend,
-            self.jit: TorchscriptBackend,
-            self.onnx: ONNXBackend,
-            self.engine: TensorRTBackend,
-            self.xml: OpenVinoBackend,
-            self.tflite: TFLiteBackend,
-        }
-        for condition, backend_class in backend_map.items():
-            if condition:
-                return backend_class(self.weights, self.device, self.half)
-        raise RuntimeError("This model framework is not supported yet!")
+	def get_backend(self):
+		backend_map = {
+			self.pt or self.pth: PyTorchBackend,
+			self.jit: TorchscriptBackend,
+			self.onnx: ONNXBackend,
+			self.engine: TensorRTBackend,
+			self.xml: OpenVinoBackend,
+			self.tflite: TFLiteBackend,
+		}
+		for condition, backend_class in backend_map.items():
+			if condition:
+				return backend_class(self.weights, self.device, self.half)
+		raise RuntimeError("This model framework is not supported yet!")
 
-    def check_suffix(
-        self,
-        file: Path = "osnet_x0_25_msmt17.pt",
-        suffix: str | tuple[str, ...] = (".pt",),
-        msg: str = "",
-    ):
-        suffix = [suffix] if isinstance(suffix, str) else list(suffix)
-        files = [file] if isinstance(file, (str, Path)) else list(file)
-        for f in files:
-            file_suffix = Path(f).suffix.lower()
-            if file_suffix and file_suffix not in suffix:
-                print(
-                    f"File {f} does not have an acceptable suffix. Expected: {suffix}"
-                )
+	def check_suffix(self, file: Path = "osnet_x0_25_msmt17.pt", suffix: Union[str, Tuple[str, ...]] = (".pt",), msg: str = ""):
+		suffix = [suffix] if isinstance(suffix, str) else list(suffix)
+		files = [file] if isinstance(file, (str, Path)) else list(file)
+		for f in files:
+			file_suffix = Path(f).suffix.lower()
+			if file_suffix and file_suffix not in suffix:
+				print(f"File {f} does not have an acceptable suffix. Expected: {suffix}")
 
-    def model_type(self, p: Path) -> tuple[bool, ...]:
-        # For demo, just check for .pt
-        sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
-        self.check_suffix(p, sf)
-        types = [str(Path(p)).endswith(s) for s in sf]
-        # OpenVINO explicit check
-        if Path(p).suffix in [".xml", ".bin"]:
-            types[3] = True
-        return tuple(types)
+	def model_type(self, p: Path) -> Tuple[bool, ...]:
+		# For demo, just check for .pt
+		sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
+		self.check_suffix(p, sf)
+		types = [str(Path(p)).endswith(s) for s in sf]
+		# OpenVINO explicit check
+		if Path(p).suffix in ['.xml', '.bin']:
+			types[3] = True
+		return tuple(types)
diff --git a/ethology/reid/core/config.py b/ethology/reid/core/config.py
index dc17cc14..926c0cc9 100644
--- a/ethology/reid/core/config.py
+++ b/ethology/reid/core/config.py
@@ -1,16 +1,16 @@
 MODEL_TYPES = [
-    "resnet50",
-    "resnet101",
-    "mlfn",
-    "hacnn",
-    "mobilenetv2_x1_0",
-    "mobilenetv2_x1_4",
-    "osnet_x1_0",
-    "osnet_x0_75",
-    "osnet_x0_5",
-    "osnet_x0_25",
-    "osnet_ibn_x1_0",
-    "osnet_ain_x1_0",
-    "lmbn_n",
-    "clip",
+	"resnet50",
+	"resnet101",
+	"mlfn",
+	"hacnn",
+	"mobilenetv2_x1_0",
+	"mobilenetv2_x1_4",
+	"osnet_x1_0",
+	"osnet_x0_75",
+	"osnet_x0_5",
+	"osnet_x0_25",
+	"osnet_ibn_x1_0",
+	"osnet_ain_x1_0",
+	"lmbn_n",
+	"clip",
 ]
diff --git a/ethology/reid/core/factory.py b/ethology/reid/core/factory.py
index 27406383..bc8b6ab1 100644
--- a/ethology/reid/core/factory.py
+++ b/ethology/reid/core/factory.py
@@ -1,44 +1,30 @@
+
 # Import model constructors from ethology's local backbones
 from ethology.reid.backbones.hacnn import HACNN
 from ethology.reid.backbones.mlfn import mlfn
-from ethology.reid.backbones.mobilenetv2 import (
-    mobilenetv2_x1_0,
-    mobilenetv2_x1_4,
-)
-from ethology.reid.backbones.osnet import (
-    osnet_ibn_x1_0,
-    osnet_x0_5,
-    osnet_x0_25,
-    osnet_x0_75,
-    osnet_x1_0,
-)
-from ethology.reid.backbones.osnet_ain import (
-    osnet_ain_x0_5,
-    osnet_ain_x0_25,
-    osnet_ain_x0_75,
-    osnet_ain_x1_0,
-)
+from ethology.reid.backbones.mobilenetv2 import mobilenetv2_x1_0, mobilenetv2_x1_4
+from ethology.reid.backbones.osnet import osnet_ibn_x1_0, osnet_x0_5, osnet_x0_25, osnet_x0_75, osnet_x1_0
+from ethology.reid.backbones.osnet_ain import osnet_ain_x0_5, osnet_ain_x0_25, osnet_ain_x0_75, osnet_ain_x1_0
 from ethology.reid.backbones.resnet import resnet50, resnet101
-
 # from ethology.reid.backbones.lmbn.lmbn_n import LMBN_n  # If present
 # from ethology.reid.backbones.clip.make_model import make_model  # If present
 
 MODEL_FACTORY = {
-    "resnet50": resnet50,
-    "resnet101": resnet101,
-    "mobilenetv2_x1_0": mobilenetv2_x1_0,
-    "mobilenetv2_x1_4": mobilenetv2_x1_4,
-    "hacnn": HACNN,
-    "mlfn": mlfn,
-    "osnet_x1_0": osnet_x1_0,
-    "osnet_x0_75": osnet_x0_75,
-    "osnet_x0_5": osnet_x0_5,
-    "osnet_x0_25": osnet_x0_25,
-    "osnet_ibn_x1_0": osnet_ibn_x1_0,
-    "osnet_ain_x1_0": osnet_ain_x1_0,
-    "osnet_ain_x0_75": osnet_ain_x0_75,
-    "osnet_ain_x0_5": osnet_ain_x0_5,
-    "osnet_ain_x0_25": osnet_ain_x0_25,
-    # "lmbn_n": LMBN_n,  # Uncomment if implemented
-    # "clip": make_model,  # Uncomment if implemented
+	"resnet50": resnet50,
+	"resnet101": resnet101,
+	"mobilenetv2_x1_0": mobilenetv2_x1_0,
+	"mobilenetv2_x1_4": mobilenetv2_x1_4,
+	"hacnn": HACNN,
+	"mlfn": mlfn,
+	"osnet_x1_0": osnet_x1_0,
+	"osnet_x0_75": osnet_x0_75,
+	"osnet_x0_5": osnet_x0_5,
+	"osnet_x0_25": osnet_x0_25,
+	"osnet_ibn_x1_0": osnet_ibn_x1_0,
+	"osnet_ain_x1_0": osnet_ain_x1_0,
+	"osnet_ain_x0_75": osnet_ain_x0_75,
+	"osnet_ain_x0_5": osnet_ain_x0_5,
+	"osnet_ain_x0_25": osnet_ain_x0_25,
+	# "lmbn_n": LMBN_n,  # Uncomment if implemented
+	# "clip": make_model,  # Uncomment if implemented
 }
diff --git a/ethology/reid/core/handler.py b/ethology/reid/core/handler.py
index ba521ab2..b5e51391 100644
--- a/ethology/reid/core/handler.py
+++ b/ethology/reid/core/handler.py
@@ -2,35 +2,32 @@
 
 # Thin wrapper to use BoxMOT ReID models in ethology
 from pathlib import Path
-
+from typing import Union
 import numpy as np
 
+
 # Import ethology's local ReID handler
 from ethology.reid.core.reid_handler import ReID as EthologyReID
 
-
 class ReIDHandler:
-    """Ethology ReID handler using local models and backends."""
-
-    def __init__(self, weights: str | Path, device="cpu", half=False):
+    """
+    Ethology ReID handler using local models and backends.
+    """
+    def __init__(self, weights: Union[str, Path], device='cpu', half=False):
         self.model = EthologyReID(weights=weights, device=device, half=half)
 
-    def extract_features(
-        self, frame: np.ndarray, dets: np.ndarray
-    ) -> np.ndarray:
-        """Extract feature embeddings for detections in a frame.
-
+    def extract_features(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
+        """
+        Extract feature embeddings for detections in a frame.
         Parameters
         ----------
         frame : np.ndarray
             (H, W, C) BGR image.
         dets : np.ndarray
             (N, 6) array of detections (x1, y1, x2, y2, conf, cls).
-
         Returns
         -------
         np.ndarray
             (N, D) feature embeddings.
-
         """
         return self.model(frame, dets)
diff --git a/ethology/reid/core/registry.py b/ethology/reid/core/registry.py
index 4b9c27fd..333cff2f 100644
--- a/ethology/reid/core/registry.py
+++ b/ethology/reid/core/registry.py
@@ -1,88 +1,71 @@
-from collections import OrderedDict
 
+from collections import OrderedDict
 import torch
-
-from ethology.reid.core.config import (
-    MODEL_TYPES,  # , NR_CLASSES_DICT, TRAINED_URLS
-)
+from ethology.reid.core.config import MODEL_TYPES  #, NR_CLASSES_DICT, TRAINED_URLS
 from ethology.reid.core.factory import MODEL_FACTORY
-
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 
-
 class ReIDModelRegistry:
-    """Encapsulates model registration and related utilities."""
+	"""Encapsulates model registration and related utilities."""
 
-    @staticmethod
-    def show_downloadable_models():
-        # LOGGER.info("Available .pt ReID models for automatic download")
-        # LOGGER.info(list(TRAINED_URLS.keys()))
-        pass
+	@staticmethod
+	def show_downloadable_models():
+		# LOGGER.info("Available .pt ReID models for automatic download")
+		# LOGGER.info(list(TRAINED_URLS.keys()))
+		pass
 
-    @staticmethod
-    def get_model_name(model):
-        for name in MODEL_TYPES:
-            if name in model.name:
-                return name
-        return None
+	@staticmethod
+	def get_model_name(model):
+		for name in MODEL_TYPES:
+			if name in model.name:
+				return name
+		return None
 
-    @staticmethod
-    def get_model_url(model):
-        # return TRAINED_URLS.get(model.name, None)
-        return None
+	@staticmethod
+	def get_model_url(model):
+		# return TRAINED_URLS.get(model.name, None)
+		return None
 
-    @staticmethod
-    def load_pretrained_weights(model, weight_path):
-        device = "cpu" if not torch.cuda.is_available() else None
-        checkpoint = torch.load(
-            weight_path,
-            map_location=torch.device("cpu") if device == "cpu" else None,
-            weights_only=False,
-            encoding="latin1",
-        )
-        state_dict = checkpoint.get("state_dict", checkpoint)
-        model_dict = model.state_dict()
-        new_state_dict = OrderedDict()
-        matched_layers, discarded_layers = [], []
-        for k, v in state_dict.items():
-            key = k[7:] if k.startswith("module.") else k
-            if key in model_dict and model_dict[key].size() == v.size():
-                new_state_dict[key] = v
-                matched_layers.append(key)
-            else:
-                discarded_layers.append(key)
-        model_dict.update(new_state_dict)
-        model.load_state_dict(model_dict)
+	@staticmethod
+	def load_pretrained_weights(model, weight_path):
+		device = "cpu" if not torch.cuda.is_available() else None
+		checkpoint = torch.load(
+			weight_path,
+			map_location=torch.device("cpu") if device == "cpu" else None,
+			weights_only=False,
+			encoding='latin1',
+		)
+		state_dict = checkpoint.get("state_dict", checkpoint)
+		model_dict = model.state_dict()
+		new_state_dict = OrderedDict()
+		matched_layers, discarded_layers = [], []
+		for k, v in state_dict.items():
+			key = k[7:] if k.startswith("module.") else k
+			if key in model_dict and model_dict[key].size() == v.size():
+				new_state_dict[key] = v
+				matched_layers.append(key)
+			else:
+				discarded_layers.append(key)
+		model_dict.update(new_state_dict)
+		model.load_state_dict(model_dict)
 
-    @staticmethod
-    def show_available_models():
-        # LOGGER.info("Available models:")
-        # LOGGER.info(list(MODEL_FACTORY.keys()))
-        pass
+	@staticmethod
+	def show_available_models():
+		# LOGGER.info("Available models:")
+		# LOGGER.info(list(MODEL_FACTORY.keys()))
+		pass
 
-    @staticmethod
-    def get_nr_classes(weights):
-        # dataset_key = weights.name.split("_")[1]
-        # return NR_CLASSES_DICT.get(dataset_key, 1)
-        return 1
+	@staticmethod
+	def get_nr_classes(weights):
+		# dataset_key = weights.name.split("_")[1]
+		# return NR_CLASSES_DICT.get(dataset_key, 1)
+		return 1
 
-    @staticmethod
-    def build_model(
-        name,
-        weights,
-        num_classes,
-        loss="softmax",
-        pretrained=True,
-        use_gpu=True,
-    ):
-        if name not in MODEL_FACTORY:
-            available = list(MODEL_FACTORY.keys())
-            raise KeyError(
-                f"Unknown model '{name}'. Must be one of {available}"
-            )
-        return MODEL_FACTORY[name](
-            num_classes=num_classes,
-            loss=loss,
-            pretrained=pretrained,
-            use_gpu=use_gpu,
-        )
+	@staticmethod
+	def build_model(name, weights, num_classes, loss="softmax", pretrained=True, use_gpu=True):
+		if name not in MODEL_FACTORY:
+			available = list(MODEL_FACTORY.keys())
+			raise KeyError(f"Unknown model '{name}'. Must be one of {available}")
+		return MODEL_FACTORY[name](
+			num_classes=num_classes, loss=loss, pretrained=pretrained, use_gpu=use_gpu
+		)
diff --git a/ethology/reid/core/reid_handler.py b/ethology/reid/core/reid_handler.py
index 62d42209..2c72658a 100644
--- a/ethology/reid/core/reid_handler.py
+++ b/ethology/reid/core/reid_handler.py
@@ -1,33 +1,28 @@
-from pathlib import Path
 
+from pathlib import Path
+from typing import Union
 import numpy as np
-
 from ethology.reid.core.auto_backend import ReidAutoBackend
 
-
 class ReID:
-    def __init__(self, weights: str | Path, device="cpu", half=False):
-        self.weights = Path(weights)
-        self.device = device
-        self.half = half
-        self.backend = ReidAutoBackend(
-            weights=self.weights, device=device, half=half
-        )
-        self.model = self.backend.model
-
-    def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
-        """Extract features for detections in a frame.
-
-        Args:
-                frame: (H, W, C) BGR image
-                dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
-
-        Returns:
-                embs: (N, D) embeddings.
-
-        """
-        if dets.shape[0] == 0:
-            return np.empty((0, 0))
-        xyxy = dets[:, :4]
-        embs = self.model.get_features(xyxy, frame)
-        return embs
+	def __init__(self, weights: Union[str, Path], device='cpu', half=False):
+		self.weights = Path(weights)
+		self.device = device
+		self.half = half
+		self.backend = ReidAutoBackend(weights=self.weights, device=device, half=half)
+		self.model = self.backend.model
+
+	def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
+		"""
+		Extract features for detections in a frame.
+		Args:
+			frame: (H, W, C) BGR image
+			dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
+		Returns:
+			embs: (N, D) embeddings.
+		"""
+		if dets.shape[0] == 0:
+			return np.empty((0, 0))
+		xyxy = dets[:, :4]
+		embs = self.model.get_features(xyxy, frame)
+		return embs
diff --git a/tests/test_unit/test_reid_handler.py b/tests/test_unit/test_reid_handler.py
index bc8a199c..3a5146cf 100644
--- a/tests/test_unit/test_reid_handler.py
+++ b/tests/test_unit/test_reid_handler.py
@@ -1,16 +1,12 @@
 import numpy as np
-
 from ethology.reid.core.handler import ReIDHandler
 
-
 def test_extract_features_shape():
-    handler = ReIDHandler(weights="osnet_x0_25_imagenet.pth")
+    handler = ReIDHandler(weights='osnet_x0_25_imagenet.pth')
     frame = np.random.randint(0, 255, (128, 64, 3), dtype=np.uint8)
-    dets = np.array(
-        [
-            [10, 10, 50, 100, 0.9, 1],
-            [60, 20, 100, 110, 0.8, 2],
-        ]
-    )
+    dets = np.array([
+        [10, 10, 50, 100, 0.9, 1],
+        [60, 20, 100, 110, 0.8, 2],
+    ])
     feats = handler.extract_features(frame, dets)
     assert feats.shape[0] == dets.shape[0]

From 04d9a06e914bb40d3505102458a0968c1a4108f4 Mon Sep 17 00:00:00 2001
From: AnandMayank <anandmayank698@gmail.com>
Date: Wed, 18 Feb 2026 20:08:13 +0530
Subject: [PATCH 08/12] docs(examples): add ReID trajectory utility example
 with MOT results

---
 examples/reid_mot_example.py | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)
 create mode 100644 examples/reid_mot_example.py

diff --git a/examples/reid_mot_example.py b/examples/reid_mot_example.py
new file mode 100644
index 00000000..3862e1dd
--- /dev/null
+++ b/examples/reid_mot_example.py
@@ -0,0 +1,23 @@
+"""
+Example: Using the new ReID trajectory utility with MOT results
+
+This script demonstrates how to use the ethology reid trajectory handler with a sample MOT output.
+"""
+
+from ethology.reid.core.reid_handler import ReIDTrajectoryHandler
+
+# Example: Dummy MOT results (replace with your actual MOT output)
+mot_results = [
+    {'id': 1, 'trajectory': [(0, 0), (1, 1), (2, 2)]},
+    {'id': 2, 'trajectory': [(5, 5), (6, 6), (7, 7)]},
+]
+
+# Initialize the handler (adjust parameters as needed)
+reid_handler = ReIDTrajectoryHandler(model_name='osnet', device='cpu')
+
+# Run re-identification on the MOT results
+reid_results = reid_handler.reidentify(mot_results)
+
+print('ReID Results:')
+for item in reid_results:
+    print(item)

From eff1f28d540188be94217547a9335a9fb1d4e1ca Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Feb 2026 14:39:07 +0000
Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/reid/backbones/hacnn.py              | 592 +++++++------
 ethology/reid/backbones/mlfn.py               |   6 +-
 ethology/reid/backbones/mobilenetv2.py        | 456 +++++-----
 ethology/reid/backbones/osnet.py              | 800 ++++++++++-------
 ethology/reid/backbones/osnet_ain.py          | 831 +++++++++++-------
 ethology/reid/backends/base_backend.py        |  48 +-
 ethology/reid/backends/onnx_backend.py        |  55 +-
 ethology/reid/backends/openvino_backend.py    |  83 +-
 ethology/reid/backends/pytorch_backend.py     |  28 +-
 ethology/reid/backends/tensorrt_backend.py    | 688 ++++++++-------
 ethology/reid/backends/tflite_backend.py      |  64 +-
 ethology/reid/backends/torchscript_backend.py |  25 +-
 ethology/reid/core/auto_backend.py            | 127 +--
 ethology/reid/core/config.py                  |  28 +-
 ethology/reid/core/factory.py                 |  56 +-
 ethology/reid/core/handler.py                 |  21 +-
 ethology/reid/core/registry.py                | 133 +--
 ethology/reid/core/reid_handler.py            |  51 +-
 examples/reid_mot_example.py                  |  11 +-
 tests/test_unit/test_reid_handler.py          |  14 +-
 20 files changed, 2359 insertions(+), 1758 deletions(-)

diff --git a/ethology/reid/backbones/hacnn.py b/ethology/reid/backbones/hacnn.py
index f3a65746..27771cf7 100644
--- a/ethology/reid/backbones/hacnn.py
+++ b/ethology/reid/backbones/hacnn.py
@@ -1,7 +1,5 @@
 """HACNN backbone for person re-identification."""
 
-from __future__ import absolute_import, division
-
 import torch
 from torch import nn
 from torch.nn import functional as F
@@ -10,298 +8,330 @@
 
 
 class ConvBlock(nn.Module):
-	def __init__(self, in_c, out_c, k, s=1, p=0):
-		"""Convolutional block with batch norm and ReLU."""
-		super().__init__()
-		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
-		self.bn = nn.BatchNorm2d(out_c)
-	def forward(self, x):
-		return F.relu(self.bn(self.conv(x)))
+    def __init__(self, in_c, out_c, k, s=1, p=0):
+        """Convolutional block with batch norm and ReLU."""
+        super().__init__()
+        self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p)
+        self.bn = nn.BatchNorm2d(out_c)
+
+    def forward(self, x):
+        return F.relu(self.bn(self.conv(x)))
+
 
 class InceptionA(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		"""InceptionA block."""
-		super().__init__()
-		mid_channels = out_channels // 4
-		self.stream1 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-		)
-		self.stream2 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-		)
-		self.stream3 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-		)
-		self.stream4 = nn.Sequential(
-			nn.AvgPool2d(3, stride=1, padding=1),
-			ConvBlock(in_channels, mid_channels, 1),
-		)
-	def forward(self, x):
-		s1 = self.stream1(x)
-		s2 = self.stream2(x)
-		s3 = self.stream3(x)
-		s4 = self.stream4(x)
-		y = torch.cat([s1, s2, s3, s4], dim=1)
-		return y
+    def __init__(self, in_channels, out_channels):
+        """InceptionA block."""
+        super().__init__()
+        mid_channels = out_channels // 4
+        self.stream1 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+        )
+        self.stream2 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+        )
+        self.stream3 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+        )
+        self.stream4 = nn.Sequential(
+            nn.AvgPool2d(3, stride=1, padding=1),
+            ConvBlock(in_channels, mid_channels, 1),
+        )
+
+    def forward(self, x):
+        s1 = self.stream1(x)
+        s2 = self.stream2(x)
+        s3 = self.stream3(x)
+        s4 = self.stream4(x)
+        y = torch.cat([s1, s2, s3, s4], dim=1)
+        return y
+
 
 class InceptionB(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		"""InceptionB block."""
-		super().__init__()
-		mid_channels = out_channels // 4
-		self.stream1 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
-		)
-		self.stream2 = nn.Sequential(
-			ConvBlock(in_channels, mid_channels, 1),
-			ConvBlock(mid_channels, mid_channels, 3, p=1),
-			ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
-		)
-		self.stream3 = nn.Sequential(
-			nn.MaxPool2d(3, stride=2, padding=1),
-			ConvBlock(in_channels, mid_channels * 2, 1),
-		)
-	def forward(self, x):
-		s1 = self.stream1(x)
-		s2 = self.stream2(x)
-		s3 = self.stream3(x)
-		y = torch.cat([s1, s2, s3], dim=1)
-		return y
+    def __init__(self, in_channels, out_channels):
+        """InceptionB block."""
+        super().__init__()
+        mid_channels = out_channels // 4
+        self.stream1 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+        )
+        self.stream2 = nn.Sequential(
+            ConvBlock(in_channels, mid_channels, 1),
+            ConvBlock(mid_channels, mid_channels, 3, p=1),
+            ConvBlock(mid_channels, mid_channels, 3, s=2, p=1),
+        )
+        self.stream3 = nn.Sequential(
+            nn.MaxPool2d(3, stride=2, padding=1),
+            ConvBlock(in_channels, mid_channels * 2, 1),
+        )
+
+    def forward(self, x):
+        s1 = self.stream1(x)
+        s2 = self.stream2(x)
+        s3 = self.stream3(x)
+        y = torch.cat([s1, s2, s3], dim=1)
+        return y
+
 
 class SpatialAttn(nn.Module):
-	def __init__(self):
-		"""Spatial attention block."""
-		super().__init__()
-		self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
-		self.conv2 = ConvBlock(1, 1, 1)
-	def forward(self, x):
-		x = x.mean(1, keepdim=True)
-		x = self.conv1(x)
-		x = F.interpolate(
-			x, (x.size(2) * 2, x.size(3) * 2), mode="bilinear", align_corners=True
-		)
-		x = self.conv2(x)
-		return x
+    def __init__(self):
+        """Spatial attention block."""
+        super().__init__()
+        self.conv1 = ConvBlock(1, 1, 3, s=2, p=1)
+        self.conv2 = ConvBlock(1, 1, 1)
+
+    def forward(self, x):
+        x = x.mean(1, keepdim=True)
+        x = self.conv1(x)
+        x = F.interpolate(
+            x,
+            (x.size(2) * 2, x.size(3) * 2),
+            mode="bilinear",
+            align_corners=True,
+        )
+        x = self.conv2(x)
+        return x
+
 
 class ChannelAttn(nn.Module):
-	def __init__(self, in_channels, reduction_rate=16):
-		"""Channel attention block."""
-		super().__init__()
-		assert in_channels % reduction_rate == 0
-		self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
-		self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
-	def forward(self, x):
-		x = F.avg_pool2d(x, x.size()[2:])
-		x = self.conv1(x)
-		x = self.conv2(x)
-		return x
+    def __init__(self, in_channels, reduction_rate=16):
+        """Channel attention block."""
+        super().__init__()
+        assert in_channels % reduction_rate == 0
+        self.conv1 = ConvBlock(in_channels, in_channels // reduction_rate, 1)
+        self.conv2 = ConvBlock(in_channels // reduction_rate, in_channels, 1)
+
+    def forward(self, x):
+        x = F.avg_pool2d(x, x.size()[2:])
+        x = self.conv1(x)
+        x = self.conv2(x)
+        return x
+
 
 class SoftAttn(nn.Module):
-	def __init__(self, in_channels):
-		"""Soft attention block."""
-		super().__init__()
-		self.spatial_attn = SpatialAttn()
-		self.channel_attn = ChannelAttn(in_channels)
-		self.conv = ConvBlock(in_channels, in_channels, 1)
-	def forward(self, x):
-		y_spatial = self.spatial_attn(x)
-		y_channel = self.channel_attn(x)
-		y = y_spatial * y_channel
-		y = torch.sigmoid(self.conv(y))
-		return y
+    def __init__(self, in_channels):
+        """Soft attention block."""
+        super().__init__()
+        self.spatial_attn = SpatialAttn()
+        self.channel_attn = ChannelAttn(in_channels)
+        self.conv = ConvBlock(in_channels, in_channels, 1)
+
+    def forward(self, x):
+        y_spatial = self.spatial_attn(x)
+        y_channel = self.channel_attn(x)
+        y = y_spatial * y_channel
+        y = torch.sigmoid(self.conv(y))
+        return y
+
 
 class HardAttn(nn.Module):
-	def __init__(self, in_channels):
-		"""Hard attention block."""
-		super().__init__()
-		self.fc = nn.Linear(in_channels, 4 * 2)
-		self.init_params()
-	def init_params(self):
-		self.fc.weight.data.zero_()
-		self.fc.bias.data.copy_(
-			torch.tensor([0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float)
-		)
-	def forward(self, x):
-		x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
-		theta = torch.tanh(self.fc(x))
-		theta = theta.view(-1, 4, 2)
-		return theta
+    def __init__(self, in_channels):
+        """Hard attention block."""
+        super().__init__()
+        self.fc = nn.Linear(in_channels, 4 * 2)
+        self.init_params()
+
+    def init_params(self):
+        self.fc.weight.data.zero_()
+        self.fc.bias.data.copy_(
+            torch.tensor(
+                [0, -0.75, 0, -0.25, 0, 0.25, 0, 0.75], dtype=torch.float
+            )
+        )
+
+    def forward(self, x):
+        x = F.avg_pool2d(x, x.size()[2:]).view(x.size(0), x.size(1))
+        theta = torch.tanh(self.fc(x))
+        theta = theta.view(-1, 4, 2)
+        return theta
+
 
 class HarmAttn(nn.Module):
-	def __init__(self, in_channels):
-		"""Harmonious attention block."""
-		super().__init__()
-		self.soft_attn = SoftAttn(in_channels)
-		self.hard_attn = HardAttn(in_channels)
-	def forward(self, x):
-		y_soft_attn = self.soft_attn(x)
-		theta = self.hard_attn(x)
-		return y_soft_attn, theta
+    def __init__(self, in_channels):
+        """Harmonious attention block."""
+        super().__init__()
+        self.soft_attn = SoftAttn(in_channels)
+        self.hard_attn = HardAttn(in_channels)
+
+    def forward(self, x):
+        y_soft_attn = self.soft_attn(x)
+        theta = self.hard_attn(x)
+        return y_soft_attn, theta
+
 
 class HACNN(nn.Module):
-	def __init__(
-		self,
-		num_classes,
-		loss="softmax",
-		nchannels=None,
-		feat_dim=512,
-		learn_region=True,
-		use_gpu=True,
-		**kwargs,
-	):
-		"""Harmonious Attention Convolutional Neural Network (HACNN) for person re-identification."""
-		super().__init__()
-		if nchannels is None:
-			nchannels = [128, 256, 384]
-		self.loss = loss
-		self.learn_region = learn_region
-		self.use_gpu = use_gpu
-		self.conv = ConvBlock(3, 32, 3, s=2, p=1)
-		self.inception1 = nn.Sequential(
-			InceptionA(32, nchannels[0]),
-			InceptionB(nchannels[0], nchannels[0]),
-		)
-		self.ha1 = HarmAttn(nchannels[0])
-		self.inception2 = nn.Sequential(
-			InceptionA(nchannels[0], nchannels[1]),
-			InceptionB(nchannels[1], nchannels[1]),
-		)
-		self.ha2 = HarmAttn(nchannels[1])
-		self.inception3 = nn.Sequential(
-			InceptionA(nchannels[1], nchannels[2]),
-			InceptionB(nchannels[2], nchannels[2]),
-		)
-		self.ha3 = HarmAttn(nchannels[2])
-		self.fc_global = nn.Sequential(
-			nn.Linear(nchannels[2], feat_dim),
-			nn.BatchNorm1d(feat_dim),
-			nn.ReLU(),
-		)
-		self.classifier_global = nn.Linear(feat_dim, num_classes)
-		if self.learn_region:
-			self.init_scale_factors()
-			self.local_conv1 = InceptionB(32, nchannels[0])
-			self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
-			self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
-			self.fc_local = nn.Sequential(
-				nn.Linear(nchannels[2] * 4, feat_dim),
-				nn.BatchNorm1d(feat_dim),
-				nn.ReLU(),
-			)
-			self.classifier_local = nn.Linear(feat_dim, num_classes)
-			self.feat_dim = feat_dim * 2
-		else:
-			self.feat_dim = feat_dim
-	def init_scale_factors(self):
-		"""Initialize scale factors for STN."""
-		self.scale_factors = []
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-		self.scale_factors.append(torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float))
-	def stn(self, x, theta):
-		"""Spatial transformer network."""
-		grid = F.affine_grid(theta, x.size())
-		x = F.grid_sample(x, grid)
-		return x
-	def transform_theta(self, theta_i, region_idx):
-		"""Transform theta for a given region."""
-		scale_factors = self.scale_factors[region_idx]
-		theta = torch.zeros(theta_i.size(0), 2, 3)
-		theta[:, :, :2] = scale_factors
-		theta[:, :, -1] = theta_i
-		if self.use_gpu:
-			theta = theta.to(next(self.parameters()).device)
-		return theta
-	def forward(self, x):
-		"""Forward pass."""
-		assert (
-			x.size(2) == 160 and x.size(3) == 64
-		), (
-			f"Input size does not match, expected (160, 64) but got ({x.size(2)}, {x.size(3)})"
-		)
-		x = self.conv(x)
-		x1 = self.inception1(x)
-		x1_attn, x1_theta = self.ha1(x1)
-		x1_out = x1 * x1_attn
-		if self.learn_region:
-			x1_local_list = []
-			for region_idx in range(4):
-				x1_theta_i = x1_theta[:, region_idx, :]
-				x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
-				x1_trans_i = self.stn(x, x1_theta_i)
-				x1_trans_i = F.interpolate(
-					x1_trans_i, (24, 28), mode="bilinear", align_corners=True
-				)
-				x1_local_i = self.local_conv1(x1_trans_i)
-				x1_local_list.append(x1_local_i)
-		x2 = self.inception2(x1_out)
-		x2_attn, x2_theta = self.ha2(x2)
-		x2_out = x2 * x2_attn
-		if self.learn_region:
-			x2_local_list = []
-			for region_idx in range(4):
-				x2_theta_i = x2_theta[:, region_idx, :]
-				x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
-				x2_trans_i = self.stn(x1_out, x2_theta_i)
-				x2_trans_i = F.interpolate(
-					x2_trans_i, (12, 14), mode="bilinear", align_corners=True
-				)
-				x2_local_i = x2_trans_i + x1_local_list[region_idx]
-				x2_local_i = self.local_conv2(x2_local_i)
-				x2_local_list.append(x2_local_i)
-		x3 = self.inception3(x2_out)
-		x3_attn, x3_theta = self.ha3(x3)
-		x3_out = x3 * x3_attn
-		if self.learn_region:
-			x3_local_list = []
-			for region_idx in range(4):
-				x3_theta_i = x3_theta[:, region_idx, :]
-				x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
-				x3_trans_i = self.stn(x2_out, x3_theta_i)
-				x3_trans_i = F.interpolate(
-					x3_trans_i, (6, 7), mode="bilinear", align_corners=True
-				)
-				x3_local_i = x3_trans_i + x2_local_list[region_idx]
-				x3_local_i = self.local_conv3(x3_local_i)
-				x3_local_list.append(x3_local_i)
-		x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
-			x3_out.size(0), x3_out.size(1)
-		)
-		x_global = self.fc_global(x_global)
-		if self.learn_region:
-			x_local_list = []
-			for region_idx in range(4):
-				x_local_i = x3_local_list[region_idx]
-				x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
-					x_local_i.size(0), -1
-				)
-				x_local_list.append(x_local_i)
-			x_local = torch.cat(x_local_list, 1)
-			x_local = self.fc_local(x_local)
-		if not self.training:
-			if self.learn_region:
-				x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
-				x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
-				return torch.cat([x_global, x_local], 1)
-			else:
-				return x_global
-		prelogits_global = self.classifier_global(x_global)
-		if self.learn_region:
-			prelogits_local = self.classifier_local(x_local)
-		if self.loss == "softmax":
-			if self.learn_region:
-				return (prelogits_global, prelogits_local)
-			else:
-				return prelogits_global
-		elif self.loss == "triplet":
-			if self.learn_region:
-				return (prelogits_global, prelogits_local), (x_global, x_local)
-			else:
-				return prelogits_global, x_global
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        loss="softmax",
+        nchannels=None,
+        feat_dim=512,
+        learn_region=True,
+        use_gpu=True,
+        **kwargs,
+    ):
+        """Harmonious Attention Convolutional Neural Network (HACNN) for person re-identification."""
+        super().__init__()
+        if nchannels is None:
+            nchannels = [128, 256, 384]
+        self.loss = loss
+        self.learn_region = learn_region
+        self.use_gpu = use_gpu
+        self.conv = ConvBlock(3, 32, 3, s=2, p=1)
+        self.inception1 = nn.Sequential(
+            InceptionA(32, nchannels[0]),
+            InceptionB(nchannels[0], nchannels[0]),
+        )
+        self.ha1 = HarmAttn(nchannels[0])
+        self.inception2 = nn.Sequential(
+            InceptionA(nchannels[0], nchannels[1]),
+            InceptionB(nchannels[1], nchannels[1]),
+        )
+        self.ha2 = HarmAttn(nchannels[1])
+        self.inception3 = nn.Sequential(
+            InceptionA(nchannels[1], nchannels[2]),
+            InceptionB(nchannels[2], nchannels[2]),
+        )
+        self.ha3 = HarmAttn(nchannels[2])
+        self.fc_global = nn.Sequential(
+            nn.Linear(nchannels[2], feat_dim),
+            nn.BatchNorm1d(feat_dim),
+            nn.ReLU(),
+        )
+        self.classifier_global = nn.Linear(feat_dim, num_classes)
+        if self.learn_region:
+            self.init_scale_factors()
+            self.local_conv1 = InceptionB(32, nchannels[0])
+            self.local_conv2 = InceptionB(nchannels[0], nchannels[1])
+            self.local_conv3 = InceptionB(nchannels[1], nchannels[2])
+            self.fc_local = nn.Sequential(
+                nn.Linear(nchannels[2] * 4, feat_dim),
+                nn.BatchNorm1d(feat_dim),
+                nn.ReLU(),
+            )
+            self.classifier_local = nn.Linear(feat_dim, num_classes)
+            self.feat_dim = feat_dim * 2
+        else:
+            self.feat_dim = feat_dim
+
+    def init_scale_factors(self):
+        """Initialize scale factors for STN."""
+        self.scale_factors = []
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+        self.scale_factors.append(
+            torch.tensor([[1, 0], [0, 0.25]], dtype=torch.float)
+        )
+
+    def stn(self, x, theta):
+        """Spatial transformer network."""
+        grid = F.affine_grid(theta, x.size())
+        x = F.grid_sample(x, grid)
+        return x
+
+    def transform_theta(self, theta_i, region_idx):
+        """Transform theta for a given region."""
+        scale_factors = self.scale_factors[region_idx]
+        theta = torch.zeros(theta_i.size(0), 2, 3)
+        theta[:, :, :2] = scale_factors
+        theta[:, :, -1] = theta_i
+        if self.use_gpu:
+            theta = theta.to(next(self.parameters()).device)
+        return theta
+
+    def forward(self, x):
+        """Forward pass."""
+        assert x.size(2) == 160 and x.size(3) == 64, (
+            f"Input size does not match, expected (160, 64) but got ({x.size(2)}, {x.size(3)})"
+        )
+        x = self.conv(x)
+        x1 = self.inception1(x)
+        x1_attn, x1_theta = self.ha1(x1)
+        x1_out = x1 * x1_attn
+        if self.learn_region:
+            x1_local_list = []
+            for region_idx in range(4):
+                x1_theta_i = x1_theta[:, region_idx, :]
+                x1_theta_i = self.transform_theta(x1_theta_i, region_idx)
+                x1_trans_i = self.stn(x, x1_theta_i)
+                x1_trans_i = F.interpolate(
+                    x1_trans_i, (24, 28), mode="bilinear", align_corners=True
+                )
+                x1_local_i = self.local_conv1(x1_trans_i)
+                x1_local_list.append(x1_local_i)
+        x2 = self.inception2(x1_out)
+        x2_attn, x2_theta = self.ha2(x2)
+        x2_out = x2 * x2_attn
+        if self.learn_region:
+            x2_local_list = []
+            for region_idx in range(4):
+                x2_theta_i = x2_theta[:, region_idx, :]
+                x2_theta_i = self.transform_theta(x2_theta_i, region_idx)
+                x2_trans_i = self.stn(x1_out, x2_theta_i)
+                x2_trans_i = F.interpolate(
+                    x2_trans_i, (12, 14), mode="bilinear", align_corners=True
+                )
+                x2_local_i = x2_trans_i + x1_local_list[region_idx]
+                x2_local_i = self.local_conv2(x2_local_i)
+                x2_local_list.append(x2_local_i)
+        x3 = self.inception3(x2_out)
+        x3_attn, x3_theta = self.ha3(x3)
+        x3_out = x3 * x3_attn
+        if self.learn_region:
+            x3_local_list = []
+            for region_idx in range(4):
+                x3_theta_i = x3_theta[:, region_idx, :]
+                x3_theta_i = self.transform_theta(x3_theta_i, region_idx)
+                x3_trans_i = self.stn(x2_out, x3_theta_i)
+                x3_trans_i = F.interpolate(
+                    x3_trans_i, (6, 7), mode="bilinear", align_corners=True
+                )
+                x3_local_i = x3_trans_i + x2_local_list[region_idx]
+                x3_local_i = self.local_conv3(x3_local_i)
+                x3_local_list.append(x3_local_i)
+        x_global = F.avg_pool2d(x3_out, x3_out.size()[2:]).view(
+            x3_out.size(0), x3_out.size(1)
+        )
+        x_global = self.fc_global(x_global)
+        if self.learn_region:
+            x_local_list = []
+            for region_idx in range(4):
+                x_local_i = x3_local_list[region_idx]
+                x_local_i = F.avg_pool2d(x_local_i, x_local_i.size()[2:]).view(
+                    x_local_i.size(0), -1
+                )
+                x_local_list.append(x_local_i)
+            x_local = torch.cat(x_local_list, 1)
+            x_local = self.fc_local(x_local)
+        if not self.training:
+            if self.learn_region:
+                x_global = x_global / x_global.norm(p=2, dim=1, keepdim=True)
+                x_local = x_local / x_local.norm(p=2, dim=1, keepdim=True)
+                return torch.cat([x_global, x_local], 1)
+            else:
+                return x_global
+        prelogits_global = self.classifier_global(x_global)
+        if self.learn_region:
+            prelogits_local = self.classifier_local(x_local)
+        if self.loss == "softmax":
+            if self.learn_region:
+                return (prelogits_global, prelogits_local)
+            else:
+                return prelogits_global
+        elif self.loss == "triplet":
+            if self.learn_region:
+                return (prelogits_global, prelogits_local), (x_global, x_local)
+            else:
+                return prelogits_global, x_global
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
index 334bd1c8..3d04d003 100644
--- a/ethology/reid/backbones/mlfn.py
+++ b/ethology/reid/backbones/mlfn.py
@@ -1,7 +1,5 @@
-
 """MLFN backbone for person re-identification."""
 
-from __future__ import absolute_import, division
 import torch
 import torch.utils.model_zoo as model_zoo
 from torch import nn
@@ -9,8 +7,8 @@
 
 __all__ = ["mlfn"]
 model_urls = {
-	# training epoch = 5, top1 = 51.6
-	"imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
+    # training epoch = 5, top1 = 51.6
+    "imagenet": "https://mega.nz/#!YHxAhaxC!yu9E6zWl0x5zscSouTdbZu8gdFFytDdl-RAdD2DEfpk",
 }
 
 
diff --git a/ethology/reid/backbones/mobilenetv2.py b/ethology/reid/backbones/mobilenetv2.py
index 35a16219..b3e69186 100644
--- a/ethology/reid/backbones/mobilenetv2.py
+++ b/ethology/reid/backbones/mobilenetv2.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import torch.utils.model_zoo as model_zoo
 from torch import nn
@@ -9,239 +8,272 @@
 __all__ = ["mobilenetv2_x1_0", "mobilenetv2_x1_4"]
 
 model_urls = {
-	# 1.0: top-1 71.3
-	"mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
-	# 1.4: top-1 73.9
-	"mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
+    # 1.0: top-1 71.3
+    "mobilenetv2_x1_0": "https://mega.nz/#!NKp2wAIA!1NH1pbNzY_M2hVk_hdsxNM1NUOWvvGPHhaNr-fASF6c",
+    # 1.4: top-1 73.9
+    "mobilenetv2_x1_4": "https://mega.nz/#!RGhgEIwS!xN2s2ZdyqI6vQ3EwgmRXLEW3khr9tpXg96G9SUJugGk",
 }
 
 
 class ConvBlock(nn.Module):
-	"""Basic convolutional block.
+    """Basic convolutional block.
 
-	convolution (bias discarded) + batch normalization + relu6.
+    convolution (bias discarded) + batch normalization + relu6.
 
-	Args:
-		in_c (int): number of input channels.
-		out_c (int): number of output channels.
-		k (int or tuple): kernel size.
-		s (int or tuple): stride.
-		p (int or tuple): padding.
-		g (int): number of blocked connections from input channels
-			to output channels (default: 1).
-	"""
+    Args:
+            in_c (int): number of input channels.
+            out_c (int): number of output channels.
+            k (int or tuple): kernel size.
+            s (int or tuple): stride.
+            p (int or tuple): padding.
+            g (int): number of blocked connections from input channels
+                    to output channels (default: 1).
 
-	def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
-		super(ConvBlock, self).__init__()
-		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
-		self.bn = nn.BatchNorm2d(out_c)
+    """
 
-	def forward(self, x):
-		return F.relu6(self.bn(self.conv(x)))
+    def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
+        super(ConvBlock, self).__init__()
+        self.conv = nn.Conv2d(
+            in_c, out_c, k, stride=s, padding=p, bias=False, groups=g
+        )
+        self.bn = nn.BatchNorm2d(out_c)
+
+    def forward(self, x):
+        return F.relu6(self.bn(self.conv(x)))
 
 
 class Bottleneck(nn.Module):
-	def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
-		super(Bottleneck, self).__init__()
-		mid_channels = in_channels * expansion_factor
-		self.use_residual = stride == 1 and in_channels == out_channels
-		self.conv1 = ConvBlock(in_channels, mid_channels, 1)
-		self.dwconv2 = ConvBlock(
-			mid_channels, mid_channels, 3, stride, 1, g=mid_channels
-		)
-		self.conv3 = nn.Sequential(
-			nn.Conv2d(mid_channels, out_channels, 1, bias=False),
-			nn.BatchNorm2d(out_channels),
-		)
-
-	def forward(self, x):
-		m = self.conv1(x)
-		m = self.dwconv2(m)
-		m = self.conv3(m)
-		if self.use_residual:
-			return x + m
-		else:
-			return m
+    def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
+        super(Bottleneck, self).__init__()
+        mid_channels = in_channels * expansion_factor
+        self.use_residual = stride == 1 and in_channels == out_channels
+        self.conv1 = ConvBlock(in_channels, mid_channels, 1)
+        self.dwconv2 = ConvBlock(
+            mid_channels, mid_channels, 3, stride, 1, g=mid_channels
+        )
+        self.conv3 = nn.Sequential(
+            nn.Conv2d(mid_channels, out_channels, 1, bias=False),
+            nn.BatchNorm2d(out_channels),
+        )
+
+    def forward(self, x):
+        m = self.conv1(x)
+        m = self.dwconv2(m)
+        m = self.conv3(m)
+        if self.use_residual:
+            return x + m
+        else:
+            return m
 
 
 class MobileNetV2(nn.Module):
-	"""MobileNetV2.
-
-	Reference:
-		Sandler et al. MobileNetV2: Inverted Residuals and
-		Linear Bottlenecks. CVPR 2018.
-
-	Public keys:
-		- ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
-		- ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
-	"""
-
-	def __init__(
-		self,
-		num_classes,
-		width_mult=1,
-		loss="softmax",
-		fc_dims=None,
-		dropout_p=None,
-		**kwargs,
-	):
-		super(MobileNetV2, self).__init__()
-		self.loss = loss
-		self.in_channels = int(32 * width_mult)
-		self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
-
-		# construct layers
-		self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
-		self.conv2 = self._make_layer(Bottleneck, 1, int(16 * width_mult), 1, 1)
-		self.conv3 = self._make_layer(Bottleneck, 6, int(24 * width_mult), 2, 2)
-		self.conv4 = self._make_layer(Bottleneck, 6, int(32 * width_mult), 3, 2)
-		self.conv5 = self._make_layer(Bottleneck, 6, int(64 * width_mult), 4, 2)
-		self.conv6 = self._make_layer(Bottleneck, 6, int(96 * width_mult), 3, 1)
-		self.conv7 = self._make_layer(Bottleneck, 6, int(160 * width_mult), 3, 2)
-		self.conv8 = self._make_layer(Bottleneck, 6, int(320 * width_mult), 1, 1)
-		self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
-
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc = self._construct_fc_layer(fc_dims, self.feature_dim, dropout_p)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-
-		self._init_params()
-
-	def _make_layer(self, block, t, c, n, s):
-		# t: expansion factor
-		# c: output channels
-		# n: number of blocks
-		# s: stride for first layer
-		layers = []
-		layers.append(block(self.in_channels, c, t, s))
-		self.in_channels = c
-		for i in range(1, n):
-			layers.append(block(self.in_channels, c, t))
-		return nn.Sequential(*layers)
-
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		"""Constructs fully connected layer.
-
-		Args:
-			fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
-			input_dim (int): input dimension
-			dropout_p (float): dropout probability, if None, dropout is unused
-		"""
-		if fc_dims is None:
-			self.feature_dim = input_dim
-			return None
-
-		assert isinstance(
-			fc_dims, (list, tuple)
-		), "fc_dims must be either list or tuple, but got {}".format(type(fc_dims))
-
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU(inplace=True))
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-
-		self.feature_dim = fc_dims[-1]
-
-		return nn.Sequential(*layers)
-
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.conv2(x)
-		x = self.conv3(x)
-		x = self.conv4(x)
-		x = self.conv5(x)
-		x = self.conv6(x)
-		x = self.conv7(x)
-		x = self.conv8(x)
-		x = self.conv9(x)
-		return x
-
-	def forward(self, x):
-		f = self.featuremaps(x)
-		v = self.global_avgpool(f)
-		v = v.view(v.size(0), -1)
-
-		if self.fc is not None:
-			v = self.fc(v)
-
-		if not self.training:
-			return v
-
-		y = self.classifier(v)
-
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    """MobileNetV2.
+
+    Reference:
+            Sandler et al. MobileNetV2: Inverted Residuals and
+            Linear Bottlenecks. CVPR 2018.
+
+    Public keys:
+            - ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
+            - ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
+    """
+
+    def __init__(
+        self,
+        num_classes,
+        width_mult=1,
+        loss="softmax",
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    ):
+        super(MobileNetV2, self).__init__()
+        self.loss = loss
+        self.in_channels = int(32 * width_mult)
+        self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
+
+        # construct layers
+        self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
+        self.conv2 = self._make_layer(
+            Bottleneck, 1, int(16 * width_mult), 1, 1
+        )
+        self.conv3 = self._make_layer(
+            Bottleneck, 6, int(24 * width_mult), 2, 2
+        )
+        self.conv4 = self._make_layer(
+            Bottleneck, 6, int(32 * width_mult), 3, 2
+        )
+        self.conv5 = self._make_layer(
+            Bottleneck, 6, int(64 * width_mult), 4, 2
+        )
+        self.conv6 = self._make_layer(
+            Bottleneck, 6, int(96 * width_mult), 3, 1
+        )
+        self.conv7 = self._make_layer(
+            Bottleneck, 6, int(160 * width_mult), 3, 2
+        )
+        self.conv8 = self._make_layer(
+            Bottleneck, 6, int(320 * width_mult), 1, 1
+        )
+        self.conv9 = ConvBlock(self.in_channels, self.feature_dim, 1)
+
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = self._construct_fc_layer(
+            fc_dims, self.feature_dim, dropout_p
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+
+        self._init_params()
+
+    def _make_layer(self, block, t, c, n, s):
+        # t: expansion factor
+        # c: output channels
+        # n: number of blocks
+        # s: stride for first layer
+        layers = []
+        layers.append(block(self.in_channels, c, t, s))
+        self.in_channels = c
+        for i in range(1, n):
+            layers.append(block(self.in_channels, c, t))
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        """Constructs fully connected layer.
+
+        Args:
+                fc_dims (list or tuple): dimensions of fc layers, if None, no fc layers are constructed
+                input_dim (int): input dimension
+                dropout_p (float): dropout probability, if None, dropout is unused
+
+        """
+        if fc_dims is None:
+            self.feature_dim = input_dim
+            return None
+
+        assert isinstance(fc_dims, (list, tuple)), (
+            f"fc_dims must be either list or tuple, but got {type(fc_dims)}"
+        )
+
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+
+        self.feature_dim = fc_dims[-1]
+
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(
+                m, nn.BatchNorm1d
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        x = self.conv6(x)
+        x = self.conv7(x)
+        x = self.conv8(x)
+        x = self.conv9(x)
+        return x
+
+    def forward(self, x):
+        f = self.featuremaps(x)
+        v = self.global_avgpool(f)
+        v = v.view(v.size(0), -1)
+
+        if self.fc is not None:
+            v = self.fc(v)
+
+        if not self.training:
+            return v
+
+        y = self.classifier(v)
+
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
 
 
 def init_pretrained_weights(model, model_url):
-	"""Initializes model with pretrained weights.
+    """Initializes model with pretrained weights.
 
-	Layers that don't match with pretrained layers in name or size are kept unchanged.
-	"""
-	pretrain_dict = model_zoo.load_url(model_url)
-	model_dict = model.state_dict()
-	pretrain_dict = {
-		k: v
-		for k, v in pretrain_dict.items()
-		if k in model_dict and model_dict[k].size() == v.size()
-	}
-	model_dict.update(pretrain_dict)
-	model.load_state_dict(model_dict)
+    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    """
+    pretrain_dict = model_zoo.load_url(model_url)
+    model_dict = model.state_dict()
+    pretrain_dict = {
+        k: v
+        for k, v in pretrain_dict.items()
+        if k in model_dict and model_dict[k].size() == v.size()
+    }
+    model_dict.update(pretrain_dict)
+    model.load_state_dict(model_dict)
 
 
 def mobilenetv2_x1_0(num_classes, loss, pretrained=True, **kwargs):
-	model = MobileNetV2(
-		num_classes, loss=loss, width_mult=1, fc_dims=None, dropout_p=None, **kwargs
-	)
-	if pretrained:
-		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
-		import warnings
-
-		warnings.warn(
-			"The imagenet pretrained weights need to be manually downloaded from {}".format(
-				model_urls["mobilenetv2_x1_0"]
-			)
-		)
-	return model
+    model = MobileNetV2(
+        num_classes,
+        loss=loss,
+        width_mult=1,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_0'])
+        import warnings
+
+        warnings.warn(
+            "The imagenet pretrained weights need to be manually downloaded from {}".format(
+                model_urls["mobilenetv2_x1_0"]
+            )
+        )
+    return model
 
 
 def mobilenetv2_x1_4(num_classes, loss, pretrained=True, **kwargs):
-	model = MobileNetV2(
-		num_classes, loss=loss, width_mult=1.4, fc_dims=None, dropout_p=None, **kwargs
-	)
-	if pretrained:
-		# init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
-		import warnings
-
-		warnings.warn(
-			"The imagenet pretrained weights need to be manually downloaded from {}".format(
-				model_urls["mobilenetv2_x1_4"]
-			)
-		)
-	return model
+    model = MobileNetV2(
+        num_classes,
+        loss=loss,
+        width_mult=1.4,
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    )
+    if pretrained:
+        # init_pretrained_weights(model, model_urls['mobilenetv2_x1_4'])
+        import warnings
+
+        warnings.warn(
+            "The imagenet pretrained weights need to be manually downloaded from {}".format(
+                model_urls["mobilenetv2_x1_4"]
+            )
+        )
+    return model
+
+
 # Copied from boxmot/boxmot/reid/backbones/mobilenetv2.py
diff --git a/ethology/reid/backbones/osnet.py b/ethology/reid/backbones/osnet.py
index c07e4e45..c13dd5b7 100644
--- a/ethology/reid/backbones/osnet.py
+++ b/ethology/reid/backbones/osnet.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import warnings
 
@@ -8,338 +7,529 @@
 from torch import nn
 from torch.nn import functional as F
 
-__all__ = ["osnet_x1_0", "osnet_x0_75", "osnet_x0_5", "osnet_x0_25", "osnet_ibn_x1_0"]
+__all__ = [
+    "osnet_x1_0",
+    "osnet_x0_75",
+    "osnet_x0_5",
+    "osnet_x0_25",
+    "osnet_ibn_x1_0",
+]
 
 pretrained_urls = {
-	"osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
-	"osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
-	"osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
-	"osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
-	"osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
+    "osnet_x1_0": "https://drive.google.com/uc?id=1LaG1EJpHrxdAxKnSCJ_i0u-nbxSAeiFY",
+    "osnet_x0_75": "https://drive.google.com/uc?id=1uwA9fElHOk3ZogwbeY5GkLI6QPTX70Hq",
+    "osnet_x0_5": "https://drive.google.com/uc?id=16DGLbZukvVYgINws8u8deSaOqjybZ83i",
+    "osnet_x0_25": "https://drive.google.com/uc?id=1rb8UN5ZzPKRc_xvtHlyDh-cSz88YX9hs",
+    "osnet_ibn_x1_0": "https://drive.google.com/uc?id=1sr90V6irlYYDd4_4ISU2iruoRG8J__6l",
 }
 
 # ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, ChannelGate, OSBlock...
 
+
 class ConvLayer(nn.Module):
-	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
-		super(ConvLayer, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
-		if IN:
-			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
-		else:
-			self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        groups=1,
+        IN=False,
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups,
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class Conv1x1(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv1x1, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class Conv1x1Linear(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1):
-		super(Conv1x1Linear, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
-		self.bn = nn.BatchNorm2d(out_channels)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return x
+
 
 class Conv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv3x3, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class LightConv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		super(LightConv3x3, self).__init__()
-		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
-		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU(inplace=True)
-	def forward(self, x):
-		x = self.conv1(x)
-		x = self.conv2(x)
-		x = self.bn(x)
-		x = self.relu(x)
-		return x
+    def __init__(self, in_channels, out_channels):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU(inplace=True)
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        x = self.relu(x)
+        return x
+
 
 class ChannelGate(nn.Module):
-	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
-		super(ChannelGate, self).__init__()
-		if num_gates is None:
-			num_gates = in_channels
-		self.return_gates = return_gates
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
-		self.norm1 = None
-		if layer_norm:
-			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
-		self.relu = nn.ReLU(inplace=True)
-		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
-		if gate_activation == "sigmoid":
-			self.gate_activation = nn.Sigmoid()
-		elif gate_activation == "relu":
-			self.gate_activation = nn.ReLU(inplace=True)
-		elif gate_activation == "linear":
-			self.gate_activation = None
-		else:
-			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
-	def forward(self, x):
-		input = x
-		x = self.global_avgpool(x)
-		x = self.fc1(x)
-		if self.norm1 is not None:
-			x = self.norm1(x)
-		x = self.relu(x)
-		x = self.fc2(x)
-		if self.gate_activation is not None:
-			x = self.gate_activation(x)
-		if self.return_gates:
-			return x
-		return input * x
+    def __init__(
+        self,
+        in_channels,
+        num_gates=None,
+        return_gates=False,
+        gate_activation="sigmoid",
+        reduction=16,
+        layer_norm=False,
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None:
+            num_gates = in_channels
+        self.return_gates = return_gates
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        self.norm1 = None
+        if layer_norm:
+            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU(inplace=True)
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        if gate_activation == "sigmoid":
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == "relu":
+            self.gate_activation = nn.ReLU(inplace=True)
+        elif gate_activation == "linear":
+            self.gate_activation = None
+        else:
+            raise RuntimeError(f"Unknown gate activation: {gate_activation}")
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None:
+            x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        if self.gate_activation is not None:
+            x = self.gate_activation(x)
+        if self.return_gates:
+            return x
+        return input * x
+
 
 class OSBlock(nn.Module):
-	def __init__(self, in_channels, out_channels, IN=False, bottleneck_reduction=4, **kwargs):
-		super(OSBlock, self).__init__()
-		mid_channels = out_channels // bottleneck_reduction
-		self.conv1 = Conv1x1(in_channels, mid_channels)
-		self.conv2a = LightConv3x3(mid_channels, mid_channels)
-		self.conv2b = nn.Sequential(
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-		)
-		self.conv2c = nn.Sequential(
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-		)
-		self.conv2d = nn.Sequential(
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-			LightConv3x3(mid_channels, mid_channels),
-		)
-		self.gate = ChannelGate(mid_channels)
-		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
-		self.downsample = None
-		if in_channels != out_channels:
-			self.downsample = Conv1x1Linear(in_channels, out_channels)
-		self.IN = None
-		if IN:
-			self.IN = nn.InstanceNorm2d(out_channels, affine=True)
-	def forward(self, x):
-		identity = x
-		x1 = self.conv1(x)
-		x2a = self.conv2a(x1)
-		x2b = self.conv2b(x1)
-		x2c = self.conv2c(x1)
-		x2d = self.conv2d(x1)
-		x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
-		x3 = self.conv3(x2)
-		if self.downsample is not None:
-			identity = self.downsample(identity)
-		out = x3 + identity
-		if self.IN is not None:
-			out = self.IN(out)
-		return F.relu(out)
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        IN=False,
+        bottleneck_reduction=4,
+        **kwargs,
+    ):
+        super(OSBlock, self).__init__()
+        mid_channels = out_channels // bottleneck_reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2a = LightConv3x3(mid_channels, mid_channels)
+        self.conv2b = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2c = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.conv2d = nn.Sequential(
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+            LightConv3x3(mid_channels, mid_channels),
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+        self.IN = None
+        if IN:
+            self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2a = self.conv2a(x1)
+        x2b = self.conv2b(x1)
+        x2c = self.conv2c(x1)
+        x2d = self.conv2d(x1)
+        x2 = self.gate(x2a) + self.gate(x2b) + self.gate(x2c) + self.gate(x2d)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        if self.IN is not None:
+            out = self.IN(out)
+        return F.relu(out)
+
 
 class OSNet(nn.Module):
-	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", IN=False, **kwargs):
-		super(OSNet, self).__init__()
-		num_blocks = len(blocks)
-		assert num_blocks == len(layers)
-		assert num_blocks == len(channels) - 1
-		self.loss = loss
-		self.feature_dim = feature_dim
-		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
-		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1], reduce_spatial_size=True, IN=IN)
-		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2], reduce_spatial_size=True)
-		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3], reduce_spatial_size=False)
-		self.conv5 = Conv1x1(channels[3], channels[3])
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-		self._init_params()
-	def _make_layer(self, block, layer, in_channels, out_channels, reduce_spatial_size, IN=False):
-		layers = []
-		layers.append(block(in_channels, out_channels, IN=IN))
-		for i in range(1, layer):
-			layers.append(block(out_channels, out_channels, IN=IN))
-		if reduce_spatial_size:
-			layers.append(nn.Sequential(Conv1x1(out_channels, out_channels), nn.AvgPool2d(2, stride=2)))
-		return nn.Sequential(*layers)
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		if fc_dims is None or fc_dims < 0:
-			self.feature_dim = input_dim
-			return None
-		if isinstance(fc_dims, int):
-			fc_dims = [fc_dims]
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU(inplace=True))
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-		self.feature_dim = fc_dims[-1]
-		return nn.Sequential(*layers)
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.maxpool(x)
-		x = self.conv2(x)
-		x = self.conv3(x)
-		x = self.conv4(x)
-		x = self.conv5(x)
-		return x
-	def forward(self, x, return_featuremaps=False):
-		x = self.featuremaps(x)
-		if return_featuremaps:
-			return x
-		v = self.global_avgpool(x)
-		v = v.view(v.size(0), -1)
-		if self.fc is not None:
-			v = self.fc(v)
-		if not self.training:
-			return v
-		y = self.classifier(v)
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        blocks,
+        layers,
+        channels,
+        feature_dim=512,
+        loss="softmax",
+        IN=False,
+        **kwargs,
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+        self.loss = loss
+        self.feature_dim = feature_dim
+        self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=IN)
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0],
+            layers[0],
+            channels[0],
+            channels[1],
+            reduce_spatial_size=True,
+            IN=IN,
+        )
+        self.conv3 = self._make_layer(
+            blocks[1],
+            layers[1],
+            channels[1],
+            channels[2],
+            reduce_spatial_size=True,
+        )
+        self.conv4 = self._make_layer(
+            blocks[2],
+            layers[2],
+            channels[2],
+            channels[3],
+            reduce_spatial_size=False,
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3])
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = self._construct_fc_layer(
+            self.feature_dim, channels[3], dropout_p=None
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+        self._init_params()
+
+    def _make_layer(
+        self,
+        block,
+        layer,
+        in_channels,
+        out_channels,
+        reduce_spatial_size,
+        IN=False,
+    ):
+        layers = []
+        layers.append(block(in_channels, out_channels, IN=IN))
+        for i in range(1, layer):
+            layers.append(block(out_channels, out_channels, IN=IN))
+        if reduce_spatial_size:
+            layers.append(
+                nn.Sequential(
+                    Conv1x1(out_channels, out_channels),
+                    nn.AvgPool2d(2, stride=2),
+                )
+            )
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None or fc_dims < 0:
+            self.feature_dim = input_dim
+            return None
+        if isinstance(fc_dims, int):
+            fc_dims = [fc_dims]
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU(inplace=True))
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+        self.feature_dim = fc_dims[-1]
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.BatchNorm2d) or isinstance(
+                m, nn.BatchNorm1d
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.conv3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+    def forward(self, x, return_featuremaps=False):
+        x = self.featuremaps(x)
+        if return_featuremaps:
+            return x
+        v = self.global_avgpool(x)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
+
 
 def init_pretrained_weights(model, key=""):
-	import errno
-	import os
-	from collections import OrderedDict
-	import gdown
-	def _get_torch_home():
-		ENV_TORCH_HOME = "TORCH_HOME"
-		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
-		DEFAULT_CACHE_DIR = "~/.cache"
-		torch_home = os.path.expanduser(
-			os.getenv(
-				ENV_TORCH_HOME,
-				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
-			)
-		)
-		return torch_home
-	filename = key + "_imagenet.pth"
-	# Try ethology/models/ directory first
-	ethology_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../../"))
-	models_dir = os.path.join(ethology_root, "models")
-	os.makedirs(models_dir, exist_ok=True)
-	local_file = os.path.join(models_dir, filename)
-	torch_home = _get_torch_home()
-	model_dir = os.path.join(torch_home, "checkpoints")
-	os.makedirs(model_dir, exist_ok=True)
-	cached_file = os.path.join(model_dir, filename)
-	# Prefer ethology/models/ directory file if present
-	if os.path.exists(local_file):
-		print(f"[OSNet] Loading model weights from {local_file}")
-		cached_file = local_file
-	elif os.path.exists(cached_file):
-		print(f"[OSNet] Loading model weights from {cached_file}")
-	else:
-		print(f"[OSNet] Downloading model weights to {cached_file}")
-		gdown.download(pretrained_urls[key], cached_file, quiet=False)
-	state_dict = torch.load(cached_file)
-	model_dict = model.state_dict()
-	new_state_dict = OrderedDict()
-	matched_layers, discarded_layers = [], []
-	for k, v in state_dict.items():
-		if k.startswith("module."):
-			k = k[7:]
-		if k in model_dict and model_dict[k].size() == v.size():
-			new_state_dict[k] = v
-			matched_layers.append(k)
-		else:
-			discarded_layers.append(k)
-	model_dict.update(new_state_dict)
-	model.load_state_dict(model_dict)
-	if len(matched_layers) == 0:
-		warnings.warn(
-			'The pretrained weights from "{}" cannot be loaded, '
-			"please check the key names manually "
-			"(** ignored and continue **)".format(cached_file)
-		)
-	else:
-		print(
-			'Successfully loaded imagenet pretrained weights from "{}"'.format(
-				cached_file
-			)
-		)
-		if len(discarded_layers) > 0:
-			print(
-				"** The following layers are discarded "
-				"due to unmatched keys or layer size: {}".format(discarded_layers)
-			)
+    import os
+    from collections import OrderedDict
+
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = "TORCH_HOME"
+        ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+        DEFAULT_CACHE_DIR = "~/.cache"
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"
+                ),
+            )
+        )
+        return torch_home
+
+    filename = key + "_imagenet.pth"
+    # Try ethology/models/ directory first
+    ethology_root = os.path.abspath(
+        os.path.join(os.path.dirname(__file__), "../../../")
+    )
+    models_dir = os.path.join(ethology_root, "models")
+    os.makedirs(models_dir, exist_ok=True)
+    local_file = os.path.join(models_dir, filename)
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, "checkpoints")
+    os.makedirs(model_dir, exist_ok=True)
+    cached_file = os.path.join(model_dir, filename)
+    # Prefer ethology/models/ directory file if present
+    if os.path.exists(local_file):
+        print(f"[OSNet] Loading model weights from {local_file}")
+        cached_file = local_file
+    elif os.path.exists(cached_file):
+        print(f"[OSNet] Loading model weights from {cached_file}")
+    else:
+        print(f"[OSNet] Downloading model weights to {cached_file}")
+        gdown.download(pretrained_urls[key], cached_file, quiet=False)
+    state_dict = torch.load(cached_file)
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+    for k, v in state_dict.items():
+        if k.startswith("module."):
+            k = k[7:]
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+    if len(matched_layers) == 0:
+        warnings.warn(
+            f'The pretrained weights from "{cached_file}" cannot be loaded, '
+            "please check the key names manually "
+            "(** ignored and continue **)"
+        )
+    else:
+        print(
+            f'Successfully loaded imagenet pretrained weights from "{cached_file}"'
+        )
+        if len(discarded_layers) > 0:
+            print(
+                "** The following layers are discarded "
+                f"due to unmatched keys or layer size: {discarded_layers}"
+            )
+
 
 def osnet_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x1_0")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x1_0")
+    return model
+
 
 def osnet_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x0_75")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[48, 192, 288, 384],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x0_75")
+    return model
+
 
 def osnet_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x0_5")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[32, 128, 192, 256],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x0_5")
+    return model
+
 
 def osnet_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_x0_25")
-	return model
-
-def osnet_ibn_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[OSBlock, OSBlock, OSBlock], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ibn_x1_0")
-	return model
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[16, 64, 96, 128],
+        loss=loss,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_x0_25")
+    return model
+
+
+def osnet_ibn_x1_0(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[OSBlock, OSBlock, OSBlock],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ibn_x1_0")
+    return model
diff --git a/ethology/reid/backbones/osnet_ain.py b/ethology/reid/backbones/osnet_ain.py
index 9e052209..2ef3da25 100644
--- a/ethology/reid/backbones/osnet_ain.py
+++ b/ethology/reid/backbones/osnet_ain.py
@@ -1,6 +1,5 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-from __future__ import absolute_import, division
 
 import warnings
 
@@ -8,349 +7,541 @@
 from torch import nn
 from torch.nn import functional as F
 
-__all__ = ["osnet_ain_x1_0", "osnet_ain_x0_75", "osnet_ain_x0_5", "osnet_ain_x0_25"]
+__all__ = [
+    "osnet_ain_x1_0",
+    "osnet_ain_x0_75",
+    "osnet_ain_x0_5",
+    "osnet_ain_x0_25",
+]
 
 pretrained_urls = {
-	"osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
-	"osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
-	"osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
-	"osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
+    "osnet_ain_x1_0": "https://drive.google.com/uc?id=1-CaioD9NaqbHK_kzSMW8VE4_3KcsRjEo",
+    "osnet_ain_x0_75": "https://drive.google.com/uc?id=1apy0hpsMypqstfencdH-jKIUEFOW4xoM",
+    "osnet_ain_x0_5": "https://drive.google.com/uc?id=1KusKvEYyKGDTUBVRxRiz55G31wkihB6l",
+    "osnet_ain_x0_25": "https://drive.google.com/uc?id=1SxQt2AvmEcgWNhaRb2xC4rP6ZwVDP0Wt",
 }
 
 # ...existing code for ConvLayer, Conv1x1, Conv1x1Linear, Conv3x3, LightConv3x3, LightConvStream, ChannelGate, OSBlock, OSBlockINin, OSNet, init_pretrained_weights, and instantiation functions...
 
+
 class ConvLayer(nn.Module):
-	def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, groups=1, IN=False):
-		super(ConvLayer, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, kernel_size, stride=stride, padding=padding, bias=False, groups=groups)
-		if IN:
-			self.bn = nn.InstanceNorm2d(out_channels, affine=True)
-		else:
-			self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(
+        self,
+        in_channels,
+        out_channels,
+        kernel_size,
+        stride=1,
+        padding=0,
+        groups=1,
+        IN=False,
+    ):
+        super(ConvLayer, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride=stride,
+            padding=padding,
+            bias=False,
+            groups=groups,
+        )
+        if IN:
+            self.bn = nn.InstanceNorm2d(out_channels, affine=True)
+        else:
+            self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class Conv1x1(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv1x1, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv1x1, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            1,
+            stride=stride,
+            padding=0,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class Conv1x1Linear(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, bn=True):
-		super(Conv1x1Linear, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 1, stride=stride, padding=0, bias=False)
-		self.bn = None
-		if bn:
-			self.bn = nn.BatchNorm2d(out_channels)
-	def forward(self, x):
-		x = self.conv(x)
-		if self.bn is not None:
-			x = self.bn(x)
-		return x
+    def __init__(self, in_channels, out_channels, stride=1, bn=True):
+        super(Conv1x1Linear, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels, out_channels, 1, stride=stride, padding=0, bias=False
+        )
+        self.bn = None
+        if bn:
+            self.bn = nn.BatchNorm2d(out_channels)
+
+    def forward(self, x):
+        x = self.conv(x)
+        if self.bn is not None:
+            x = self.bn(x)
+        return x
+
 
 class Conv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels, stride=1, groups=1):
-		super(Conv3x3, self).__init__()
-		self.conv = nn.Conv2d(in_channels, out_channels, 3, stride=stride, padding=1, bias=False, groups=groups)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(self, in_channels, out_channels, stride=1, groups=1):
+        super(Conv3x3, self).__init__()
+        self.conv = nn.Conv2d(
+            in_channels,
+            out_channels,
+            3,
+            stride=stride,
+            padding=1,
+            bias=False,
+            groups=groups,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class LightConv3x3(nn.Module):
-	def __init__(self, in_channels, out_channels):
-		super(LightConv3x3, self).__init__()
-		self.conv1 = nn.Conv2d(in_channels, out_channels, 1, stride=1, padding=0, bias=False)
-		self.conv2 = nn.Conv2d(out_channels, out_channels, 3, stride=1, padding=1, bias=False, groups=out_channels)
-		self.bn = nn.BatchNorm2d(out_channels)
-		self.relu = nn.ReLU()
-	def forward(self, x):
-		x = self.conv1(x)
-		x = self.conv2(x)
-		x = self.bn(x)
-		return self.relu(x)
+    def __init__(self, in_channels, out_channels):
+        super(LightConv3x3, self).__init__()
+        self.conv1 = nn.Conv2d(
+            in_channels, out_channels, 1, stride=1, padding=0, bias=False
+        )
+        self.conv2 = nn.Conv2d(
+            out_channels,
+            out_channels,
+            3,
+            stride=1,
+            padding=1,
+            bias=False,
+            groups=out_channels,
+        )
+        self.bn = nn.BatchNorm2d(out_channels)
+        self.relu = nn.ReLU()
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.conv2(x)
+        x = self.bn(x)
+        return self.relu(x)
+
 
 class LightConvStream(nn.Module):
-	def __init__(self, in_channels, out_channels, depth):
-		super(LightConvStream, self).__init__()
-		assert depth >= 1
-		layers = [LightConv3x3(in_channels, out_channels)]
-		for i in range(depth - 1):
-			layers.append(LightConv3x3(out_channels, out_channels))
-		self.layers = nn.Sequential(*layers)
-	def forward(self, x):
-		return self.layers(x)
+    def __init__(self, in_channels, out_channels, depth):
+        super(LightConvStream, self).__init__()
+        assert depth >= 1
+        layers = [LightConv3x3(in_channels, out_channels)]
+        for i in range(depth - 1):
+            layers.append(LightConv3x3(out_channels, out_channels))
+        self.layers = nn.Sequential(*layers)
+
+    def forward(self, x):
+        return self.layers(x)
+
 
 class ChannelGate(nn.Module):
-	def __init__(self, in_channels, num_gates=None, return_gates=False, gate_activation="sigmoid", reduction=16, layer_norm=False):
-		super(ChannelGate, self).__init__()
-		if num_gates is None:
-			num_gates = in_channels
-		self.return_gates = return_gates
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc1 = nn.Conv2d(in_channels, in_channels // reduction, kernel_size=1, bias=True, padding=0)
-		self.norm1 = None
-		if layer_norm:
-			self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
-		self.relu = nn.ReLU()
-		self.fc2 = nn.Conv2d(in_channels // reduction, num_gates, kernel_size=1, bias=True, padding=0)
-		if gate_activation == "sigmoid":
-			self.gate_activation = nn.Sigmoid()
-		elif gate_activation == "relu":
-			self.gate_activation = nn.ReLU()
-		elif gate_activation == "linear":
-			self.gate_activation = None
-		else:
-			raise RuntimeError("Unknown gate activation: {}".format(gate_activation))
-	def forward(self, x):
-		input = x
-		x = self.global_avgpool(x)
-		x = self.fc1(x)
-		if self.norm1 is not None:
-			x = self.norm1(x)
-		x = self.relu(x)
-		x = self.fc2(x)
-		if self.gate_activation is not None:
-			x = self.gate_activation(x)
-		if self.return_gates:
-			return x
-		return input * x
+    def __init__(
+        self,
+        in_channels,
+        num_gates=None,
+        return_gates=False,
+        gate_activation="sigmoid",
+        reduction=16,
+        layer_norm=False,
+    ):
+        super(ChannelGate, self).__init__()
+        if num_gates is None:
+            num_gates = in_channels
+        self.return_gates = return_gates
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc1 = nn.Conv2d(
+            in_channels,
+            in_channels // reduction,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        self.norm1 = None
+        if layer_norm:
+            self.norm1 = nn.LayerNorm((in_channels // reduction, 1, 1))
+        self.relu = nn.ReLU()
+        self.fc2 = nn.Conv2d(
+            in_channels // reduction,
+            num_gates,
+            kernel_size=1,
+            bias=True,
+            padding=0,
+        )
+        if gate_activation == "sigmoid":
+            self.gate_activation = nn.Sigmoid()
+        elif gate_activation == "relu":
+            self.gate_activation = nn.ReLU()
+        elif gate_activation == "linear":
+            self.gate_activation = None
+        else:
+            raise RuntimeError(f"Unknown gate activation: {gate_activation}")
+
+    def forward(self, x):
+        input = x
+        x = self.global_avgpool(x)
+        x = self.fc1(x)
+        if self.norm1 is not None:
+            x = self.norm1(x)
+        x = self.relu(x)
+        x = self.fc2(x)
+        if self.gate_activation is not None:
+            x = self.gate_activation(x)
+        if self.return_gates:
+            return x
+        return input * x
+
 
 class OSBlock(nn.Module):
-	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
-		super(OSBlock, self).__init__()
-		assert T >= 1
-		assert out_channels >= reduction and out_channels % reduction == 0
-		mid_channels = out_channels // reduction
-		self.conv1 = Conv1x1(in_channels, mid_channels)
-		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
-		self.gate = ChannelGate(mid_channels)
-		self.conv3 = Conv1x1Linear(mid_channels, out_channels)
-		self.downsample = None
-		if in_channels != out_channels:
-			self.downsample = Conv1x1Linear(in_channels, out_channels)
-	def forward(self, x):
-		identity = x
-		x1 = self.conv1(x)
-		x2 = 0
-		for conv2_t in self.conv2:
-			x2_t = conv2_t(x1)
-			x2 = x2 + self.gate(x2_t)
-		x3 = self.conv3(x2)
-		if self.downsample is not None:
-			identity = self.downsample(identity)
-		out = x3 + identity
-		return F.relu(out)
+    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+        super(OSBlock, self).__init__()
+        assert T >= 1
+        assert out_channels >= reduction and out_channels % reduction == 0
+        mid_channels = out_channels // reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2 = nn.ModuleList(
+            [
+                LightConvStream(mid_channels, mid_channels, t)
+                for t in range(1, T + 1)
+            ]
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2 = 0
+        for conv2_t in self.conv2:
+            x2_t = conv2_t(x1)
+            x2 = x2 + self.gate(x2_t)
+        x3 = self.conv3(x2)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        return F.relu(out)
+
 
 class OSBlockINin(nn.Module):
-	def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
-		super(OSBlockINin, self).__init__()
-		assert T >= 1
-		assert out_channels >= reduction and out_channels % reduction == 0
-		mid_channels = out_channels // reduction
-		self.conv1 = Conv1x1(in_channels, mid_channels)
-		self.conv2 = nn.ModuleList([LightConvStream(mid_channels, mid_channels, t) for t in range(1, T + 1)])
-		self.gate = ChannelGate(mid_channels)
-		self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
-		self.downsample = None
-		if in_channels != out_channels:
-			self.downsample = Conv1x1Linear(in_channels, out_channels)
-		self.IN = nn.InstanceNorm2d(out_channels, affine=True)
-	def forward(self, x):
-		identity = x
-		x1 = self.conv1(x)
-		x2 = 0
-		for conv2_t in self.conv2:
-			x2_t = conv2_t(x1)
-			x2 = x2 + self.gate(x2_t)
-		x3 = self.conv3(x2)
-		x3 = self.IN(x3)
-		if self.downsample is not None:
-			identity = self.downsample(identity)
-		out = x3 + identity
-		return F.relu(out)
+    def __init__(self, in_channels, out_channels, reduction=4, T=4, **kwargs):
+        super(OSBlockINin, self).__init__()
+        assert T >= 1
+        assert out_channels >= reduction and out_channels % reduction == 0
+        mid_channels = out_channels // reduction
+        self.conv1 = Conv1x1(in_channels, mid_channels)
+        self.conv2 = nn.ModuleList(
+            [
+                LightConvStream(mid_channels, mid_channels, t)
+                for t in range(1, T + 1)
+            ]
+        )
+        self.gate = ChannelGate(mid_channels)
+        self.conv3 = Conv1x1Linear(mid_channels, out_channels, bn=False)
+        self.downsample = None
+        if in_channels != out_channels:
+            self.downsample = Conv1x1Linear(in_channels, out_channels)
+        self.IN = nn.InstanceNorm2d(out_channels, affine=True)
+
+    def forward(self, x):
+        identity = x
+        x1 = self.conv1(x)
+        x2 = 0
+        for conv2_t in self.conv2:
+            x2_t = conv2_t(x1)
+            x2 = x2 + self.gate(x2_t)
+        x3 = self.conv3(x2)
+        x3 = self.IN(x3)
+        if self.downsample is not None:
+            identity = self.downsample(identity)
+        out = x3 + identity
+        return F.relu(out)
+
 
 class OSNet(nn.Module):
-	def __init__(self, num_classes, blocks, layers, channels, feature_dim=512, loss="softmax", conv1_IN=False, **kwargs):
-		super(OSNet, self).__init__()
-		num_blocks = len(blocks)
-		assert num_blocks == len(layers)
-		assert num_blocks == len(channels) - 1
-		self.loss = loss
-		self.feature_dim = feature_dim
-		self.conv1 = ConvLayer(3, channels[0], 7, stride=2, padding=3, IN=conv1_IN)
-		self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
-		self.conv2 = self._make_layer(blocks[0], layers[0], channels[0], channels[1])
-		self.pool2 = nn.Sequential(Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2))
-		self.conv3 = self._make_layer(blocks[1], layers[1], channels[1], channels[2])
-		self.pool3 = nn.Sequential(Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2))
-		self.conv4 = self._make_layer(blocks[2], layers[2], channels[2], channels[3])
-		self.conv5 = Conv1x1(channels[3], channels[3])
-		self.global_avgpool = nn.AdaptiveAvgPool2d(1)
-		self.fc = self._construct_fc_layer(self.feature_dim, channels[3], dropout_p=None)
-		self.classifier = nn.Linear(self.feature_dim, num_classes)
-		self._init_params()
-	def _make_layer(self, blocks, layer, in_channels, out_channels):
-		layers = []
-		layers += [blocks[0](in_channels, out_channels)]
-		for i in range(1, len(blocks)):
-			layers += [blocks[i](out_channels, out_channels)]
-		return nn.Sequential(*layers)
-	def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
-		if fc_dims is None or fc_dims < 0:
-			self.feature_dim = input_dim
-			return None
-		if isinstance(fc_dims, int):
-			fc_dims = [fc_dims]
-		layers = []
-		for dim in fc_dims:
-			layers.append(nn.Linear(input_dim, dim))
-			layers.append(nn.BatchNorm1d(dim))
-			layers.append(nn.ReLU())
-			if dropout_p is not None:
-				layers.append(nn.Dropout(p=dropout_p))
-			input_dim = dim
-		self.feature_dim = fc_dims[-1]
-		return nn.Sequential(*layers)
-	def _init_params(self):
-		for m in self.modules():
-			if isinstance(m, nn.Conv2d):
-				nn.init.kaiming_normal_(m.weight, mode="fan_out", nonlinearity="relu")
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.BatchNorm1d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.InstanceNorm2d):
-				nn.init.constant_(m.weight, 1)
-				nn.init.constant_(m.bias, 0)
-			elif isinstance(m, nn.Linear):
-				nn.init.normal_(m.weight, 0, 0.01)
-				if m.bias is not None:
-					nn.init.constant_(m.bias, 0)
-	def featuremaps(self, x):
-		x = self.conv1(x)
-		x = self.maxpool(x)
-		x = self.conv2(x)
-		x = self.pool2(x)
-		x = self.conv3(x)
-		x = self.pool3(x)
-		x = self.conv4(x)
-		x = self.conv5(x)
-		return x
-	def forward(self, x, return_featuremaps=False):
-		x = self.featuremaps(x)
-		if return_featuremaps:
-			return x
-		v = self.global_avgpool(x)
-		v = v.view(v.size(0), -1)
-		if self.fc is not None:
-			v = self.fc(v)
-		if not self.training:
-			return v
-		y = self.classifier(v)
-		if self.loss == "softmax":
-			return y
-		elif self.loss == "triplet":
-			return y, v
-		else:
-			raise KeyError("Unsupported loss: {}".format(self.loss))
+    def __init__(
+        self,
+        num_classes,
+        blocks,
+        layers,
+        channels,
+        feature_dim=512,
+        loss="softmax",
+        conv1_IN=False,
+        **kwargs,
+    ):
+        super(OSNet, self).__init__()
+        num_blocks = len(blocks)
+        assert num_blocks == len(layers)
+        assert num_blocks == len(channels) - 1
+        self.loss = loss
+        self.feature_dim = feature_dim
+        self.conv1 = ConvLayer(
+            3, channels[0], 7, stride=2, padding=3, IN=conv1_IN
+        )
+        self.maxpool = nn.MaxPool2d(3, stride=2, padding=1)
+        self.conv2 = self._make_layer(
+            blocks[0], layers[0], channels[0], channels[1]
+        )
+        self.pool2 = nn.Sequential(
+            Conv1x1(channels[1], channels[1]), nn.AvgPool2d(2, stride=2)
+        )
+        self.conv3 = self._make_layer(
+            blocks[1], layers[1], channels[1], channels[2]
+        )
+        self.pool3 = nn.Sequential(
+            Conv1x1(channels[2], channels[2]), nn.AvgPool2d(2, stride=2)
+        )
+        self.conv4 = self._make_layer(
+            blocks[2], layers[2], channels[2], channels[3]
+        )
+        self.conv5 = Conv1x1(channels[3], channels[3])
+        self.global_avgpool = nn.AdaptiveAvgPool2d(1)
+        self.fc = self._construct_fc_layer(
+            self.feature_dim, channels[3], dropout_p=None
+        )
+        self.classifier = nn.Linear(self.feature_dim, num_classes)
+        self._init_params()
+
+    def _make_layer(self, blocks, layer, in_channels, out_channels):
+        layers = []
+        layers += [blocks[0](in_channels, out_channels)]
+        for i in range(1, len(blocks)):
+            layers += [blocks[i](out_channels, out_channels)]
+        return nn.Sequential(*layers)
+
+    def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
+        if fc_dims is None or fc_dims < 0:
+            self.feature_dim = input_dim
+            return None
+        if isinstance(fc_dims, int):
+            fc_dims = [fc_dims]
+        layers = []
+        for dim in fc_dims:
+            layers.append(nn.Linear(input_dim, dim))
+            layers.append(nn.BatchNorm1d(dim))
+            layers.append(nn.ReLU())
+            if dropout_p is not None:
+                layers.append(nn.Dropout(p=dropout_p))
+            input_dim = dim
+        self.feature_dim = fc_dims[-1]
+        return nn.Sequential(*layers)
+
+    def _init_params(self):
+        for m in self.modules():
+            if isinstance(m, nn.Conv2d):
+                nn.init.kaiming_normal_(
+                    m.weight, mode="fan_out", nonlinearity="relu"
+                )
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+            elif (
+                isinstance(m, nn.BatchNorm2d)
+                or isinstance(m, nn.BatchNorm1d)
+                or isinstance(m, nn.InstanceNorm2d)
+            ):
+                nn.init.constant_(m.weight, 1)
+                nn.init.constant_(m.bias, 0)
+            elif isinstance(m, nn.Linear):
+                nn.init.normal_(m.weight, 0, 0.01)
+                if m.bias is not None:
+                    nn.init.constant_(m.bias, 0)
+
+    def featuremaps(self, x):
+        x = self.conv1(x)
+        x = self.maxpool(x)
+        x = self.conv2(x)
+        x = self.pool2(x)
+        x = self.conv3(x)
+        x = self.pool3(x)
+        x = self.conv4(x)
+        x = self.conv5(x)
+        return x
+
+    def forward(self, x, return_featuremaps=False):
+        x = self.featuremaps(x)
+        if return_featuremaps:
+            return x
+        v = self.global_avgpool(x)
+        v = v.view(v.size(0), -1)
+        if self.fc is not None:
+            v = self.fc(v)
+        if not self.training:
+            return v
+        y = self.classifier(v)
+        if self.loss == "softmax":
+            return y
+        elif self.loss == "triplet":
+            return y, v
+        else:
+            raise KeyError(f"Unsupported loss: {self.loss}")
+
 
 def init_pretrained_weights(model, key=""):
-	import errno
-	import os
-	from collections import OrderedDict
-	import gdown
-	def _get_torch_home():
-		ENV_TORCH_HOME = "TORCH_HOME"
-		ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
-		DEFAULT_CACHE_DIR = "~/.cache"
-		torch_home = os.path.expanduser(
-			os.getenv(
-				ENV_TORCH_HOME,
-				os.path.join(os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"),
-			)
-		)
-		return torch_home
-	torch_home = _get_torch_home()
-	model_dir = os.path.join(torch_home, "checkpoints")
-	try:
-		os.makedirs(model_dir)
-	except OSError as e:
-		if e.errno == errno.EEXIST:
-			pass
-		else:
-			raise
-	filename = key + "_imagenet.pth"
-	cached_file = os.path.join(model_dir, filename)
-	if not os.path.exists(cached_file):
-		gdown.download(pretrained_urls[key], cached_file, quiet=False)
-	state_dict = torch.load(cached_file)
-	model_dict = model.state_dict()
-	new_state_dict = OrderedDict()
-	matched_layers, discarded_layers = [], []
-	for k, v in state_dict.items():
-		if k.startswith("module."):
-			k = k[7:]
-		if k in model_dict and model_dict[k].size() == v.size():
-			new_state_dict[k] = v
-			matched_layers.append(k)
-		else:
-			discarded_layers.append(k)
-	model_dict.update(new_state_dict)
-	model.load_state_dict(model_dict)
-	if len(matched_layers) == 0:
-		warnings.warn(
-			'The pretrained weights from "{}" cannot be loaded, '
-			"please check the key names manually "
-			"(** ignored and continue **)".format(cached_file)
-		)
-	else:
-		print(
-			'Successfully loaded imagenet pretrained weights from "{}"'.format(
-				cached_file
-			)
-		)
-		if len(discarded_layers) > 0:
-			print(
-				"** The following layers are discarded "
-				"due to unmatched keys or layer size: {}".format(discarded_layers)
-			)
-
-def osnet_ain_x1_0(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[64, 256, 384, 512], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x1_0")
-	return model
-
-def osnet_ain_x0_75(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[48, 192, 288, 384], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x0_75")
-	return model
-
-def osnet_ain_x0_5(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[32, 128, 192, 256], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x0_5")
-	return model
-
-def osnet_ain_x0_25(num_classes=1000, pretrained=True, loss="softmax", **kwargs):
-	model = OSNet(num_classes, blocks=[[OSBlockINin, OSBlockINin], [OSBlock, OSBlockINin], [OSBlockINin, OSBlock]], layers=[2, 2, 2], channels=[16, 64, 96, 128], loss=loss, conv1_IN=True, **kwargs)
-	if pretrained:
-		init_pretrained_weights(model, key="osnet_ain_x0_25")
-	return model
+    import errno
+    import os
+    from collections import OrderedDict
+
+    import gdown
+
+    def _get_torch_home():
+        ENV_TORCH_HOME = "TORCH_HOME"
+        ENV_XDG_CACHE_HOME = "XDG_CACHE_HOME"
+        DEFAULT_CACHE_DIR = "~/.cache"
+        torch_home = os.path.expanduser(
+            os.getenv(
+                ENV_TORCH_HOME,
+                os.path.join(
+                    os.getenv(ENV_XDG_CACHE_HOME, DEFAULT_CACHE_DIR), "torch"
+                ),
+            )
+        )
+        return torch_home
+
+    torch_home = _get_torch_home()
+    model_dir = os.path.join(torch_home, "checkpoints")
+    try:
+        os.makedirs(model_dir)
+    except OSError as e:
+        if e.errno == errno.EEXIST:
+            pass
+        else:
+            raise
+    filename = key + "_imagenet.pth"
+    cached_file = os.path.join(model_dir, filename)
+    if not os.path.exists(cached_file):
+        gdown.download(pretrained_urls[key], cached_file, quiet=False)
+    state_dict = torch.load(cached_file)
+    model_dict = model.state_dict()
+    new_state_dict = OrderedDict()
+    matched_layers, discarded_layers = [], []
+    for k, v in state_dict.items():
+        if k.startswith("module."):
+            k = k[7:]
+        if k in model_dict and model_dict[k].size() == v.size():
+            new_state_dict[k] = v
+            matched_layers.append(k)
+        else:
+            discarded_layers.append(k)
+    model_dict.update(new_state_dict)
+    model.load_state_dict(model_dict)
+    if len(matched_layers) == 0:
+        warnings.warn(
+            f'The pretrained weights from "{cached_file}" cannot be loaded, '
+            "please check the key names manually "
+            "(** ignored and continue **)"
+        )
+    else:
+        print(
+            f'Successfully loaded imagenet pretrained weights from "{cached_file}"'
+        )
+        if len(discarded_layers) > 0:
+            print(
+                "** The following layers are discarded "
+                f"due to unmatched keys or layer size: {discarded_layers}"
+            )
+
+
+def osnet_ain_x1_0(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[64, 256, 384, 512],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x1_0")
+    return model
+
+
+def osnet_ain_x0_75(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[48, 192, 288, 384],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x0_75")
+    return model
+
+
+def osnet_ain_x0_5(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[32, 128, 192, 256],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x0_5")
+    return model
+
+
+def osnet_ain_x0_25(
+    num_classes=1000, pretrained=True, loss="softmax", **kwargs
+):
+    model = OSNet(
+        num_classes,
+        blocks=[
+            [OSBlockINin, OSBlockINin],
+            [OSBlock, OSBlockINin],
+            [OSBlockINin, OSBlock],
+        ],
+        layers=[2, 2, 2],
+        channels=[16, 64, 96, 128],
+        loss=loss,
+        conv1_IN=True,
+        **kwargs,
+    )
+    if pretrained:
+        init_pretrained_weights(model, key="osnet_ain_x0_25")
+    return model
diff --git a/ethology/reid/backends/base_backend.py b/ethology/reid/backends/base_backend.py
index 688ec43a..0edb2826 100644
--- a/ethology/reid/backends/base_backend.py
+++ b/ethology/reid/backends/base_backend.py
@@ -1,18 +1,19 @@
-
-import os
 from abc import abstractmethod
 from pathlib import Path
+
 import cv2
 import gdown
 import numpy as np
 import torch
 from filelock import SoftFileLock
+
 from ethology.reid.core.registry import ReIDModelRegistry
+
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 # from ethology.utils.checks import RequirementsChecker  # If needed, implement or set RequirementsChecker
 
-class BaseModelBackend:
 
+class BaseModelBackend:
     def __init__(self, weights, device, half):
         self.weights = weights[0] if isinstance(weights, list) else weights
         if isinstance(self.weights, str):
@@ -22,7 +23,7 @@ def __init__(self, weights, device, half):
         self.half = half
         self.model = None
         # Support both string and torch.device for device
-        if hasattr(self.device, 'type'):
+        if hasattr(self.device, "type"):
             self.cuda = torch.cuda.is_available() and self.device.type != "cpu"
         else:
             self.cuda = torch.cuda.is_available() and self.device != "cpu"
@@ -41,11 +42,19 @@ def __init__(self, weights, device, half):
 
         self.load_model(self.weights)
 
-        self.mean_array = torch.tensor([0.485, 0.456, 0.406], device=self.device).view(1, 3, 1, 1)
-        self.std_array = torch.tensor([0.229, 0.224, 0.225], device=self.device).view(1, 3, 1, 1)
+        self.mean_array = torch.tensor(
+            [0.485, 0.456, 0.406], device=self.device
+        ).view(1, 3, 1, 1)
+        self.std_array = torch.tensor(
+            [0.229, 0.224, 0.225], device=self.device
+        ).view(1, 3, 1, 1)
         if "clip" in self.model_name:
-            self.mean_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
-            self.std_array = torch.tensor([0.5, 0.5, 0.5], device=self.device).view(1, 3, 1, 1)
+            self.mean_array = torch.tensor(
+                [0.5, 0.5, 0.5], device=self.device
+            ).view(1, 3, 1, 1)
+            self.std_array = torch.tensor(
+                [0.5, 0.5, 0.5], device=self.device
+            ).view(1, 3, 1, 1)
 
         if "vehicleid" in self.weights.name or "veri" in self.weights.name:
             input_shape = (256, 256)
@@ -57,7 +66,6 @@ def __init__(self, weights, device, half):
             input_shape = (256, 128)
         self.input_shape = input_shape
 
-
     def get_crops(self, xyxys, img):
         h, w = img.shape[:2]
         interpolation_method = cv2.INTER_LINEAR
@@ -85,7 +93,6 @@ def get_crops(self, xyxys, img):
         crops = (crops - self.mean_array) / self.std_array
         return crops
 
-
     @torch.no_grad()
     def get_features(self, xyxys, img):
         if xyxys.size != 0:
@@ -98,7 +105,6 @@ def get_features(self, xyxys, img):
         features = features / np.linalg.norm(features, axis=-1, keepdims=True)
         return features
 
-
     def warmup(self, imgsz=[(256, 128, 3)]):
         if self.device.type != "cpu":
             im = np.random.randint(0, 255, *imgsz, dtype=np.uint8)
@@ -108,11 +114,9 @@ def warmup(self, imgsz=[(256, 128, 3)]):
             crops = self.inference_preprocess(crops)
             self.forward(crops)
 
-
     def to_numpy(self, x):
         return x.cpu().numpy() if isinstance(x, torch.Tensor) else x
 
-
     def inference_preprocess(self, x):
         if self.half:
             if isinstance(x, torch.Tensor):
@@ -121,32 +125,34 @@ def inference_preprocess(self, x):
             elif isinstance(x, np.ndarray):
                 if x.dtype != np.float16:
                     x = x.astype(np.float16)
-        if hasattr(self, 'nhwc') and self.nhwc:
+        if hasattr(self, "nhwc") and self.nhwc:
             if isinstance(x, torch.Tensor):
                 x = x.permute(0, 2, 3, 1)
             elif isinstance(x, np.ndarray):
                 x = np.transpose(x, (0, 2, 3, 1))
         return x
 
-
     def inference_postprocess(self, features):
         if isinstance(features, (list, tuple)):
             return (
-                self.to_numpy(features[0]) if len(features) == 1 else [self.to_numpy(x) for x in features]
+                self.to_numpy(features[0])
+                if len(features) == 1
+                else [self.to_numpy(x) for x in features]
             )
         else:
             return self.to_numpy(features)
 
-
     @abstractmethod
     def forward(self, im_batch):
-        raise NotImplementedError("This method should be implemented by subclasses.")
-
+        raise NotImplementedError(
+            "This method should be implemented by subclasses."
+        )
 
     @abstractmethod
     def load_model(self, w):
-        raise NotImplementedError("This method should be implemented by subclasses.")
-
+        raise NotImplementedError(
+            "This method should be implemented by subclasses."
+        )
 
     def download_model(self, w):
         if isinstance(w, str):
diff --git a/ethology/reid/backends/onnx_backend.py b/ethology/reid/backends/onnx_backend.py
index c7c93017..41aefb7f 100644
--- a/ethology/reid/backends/onnx_backend.py
+++ b/ethology/reid/backends/onnx_backend.py
@@ -1,31 +1,34 @@
-
 from ethology.reid.backends.base_backend import BaseModelBackend
 
+
 class ONNXBackend(BaseModelBackend):
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
+
+    def load_model(self, w):
+        # ONNXRuntime will attempt to use the first provider, and if it fails or is not
+        # available for some reason, it will fall back to the next provider in the list
+        if self.device.type == "mps":
+            # self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
+            providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
+        elif self.device.type == "cuda":
+            # self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
+            providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
+        else:
+            # self.checker.check_packages(("onnxruntime==1.18.1",))
+            providers = ["CPUExecutionProvider"]
+        import onnxruntime
 
-	def load_model(self, w):
-		# ONNXRuntime will attempt to use the first provider, and if it fails or is not
-		# available for some reason, it will fall back to the next provider in the list
-		if self.device.type == "mps":
-			# self.checker.check_packages(("onnxruntime-silicon==1.18.1",))
-			providers = ["MPSExecutionProvider", "CPUExecutionProvider"]
-		elif self.device.type == "cuda":
-			# self.checker.check_packages(("onnxruntime-gpu==1.18.1",))
-			providers = ["CUDAExecutionProvider", "CPUExecutionProvider"]
-		else:
-			# self.checker.check_packages(("onnxruntime==1.18.1",))
-			providers = ["CPUExecutionProvider"]
-		import onnxruntime
-		self.session = onnxruntime.InferenceSession(str(w), providers=providers)
+        self.session = onnxruntime.InferenceSession(
+            str(w), providers=providers
+        )
 
-	def forward(self, im_batch):
-		im_batch = im_batch.cpu().numpy()
-		features = self.session.run(
-			[self.session.get_outputs()[0].name],
-			{self.session.get_inputs()[0].name: im_batch},
-		)[0]
-		return features
+    def forward(self, im_batch):
+        im_batch = im_batch.cpu().numpy()
+        features = self.session.run(
+            [self.session.get_outputs()[0].name],
+            {self.session.get_inputs()[0].name: im_batch},
+        )[0]
+        return features
diff --git a/ethology/reid/backends/openvino_backend.py b/ethology/reid/backends/openvino_backend.py
index f06392bf..0c56a06e 100644
--- a/ethology/reid/backends/openvino_backend.py
+++ b/ethology/reid/backends/openvino_backend.py
@@ -1,48 +1,49 @@
 from pathlib import Path
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+
 # Note: LOGGER can be replaced with print or a local logger if needed
 
+
 class OpenVinoBackend(BaseModelBackend):
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
+
+    def load_model(self, w):
+        # self.checker.check_packages(("openvino>=2025.2.0",))
+
+        print(f"Loading {w} for OpenVINO inference...")
+        try:
+            # requires openvino-dev: https://pypi.org/project/openvino-dev/
+            from openvino import Core, Layout
+        except ImportError:
+            print(
+                f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
+                "requires openvino pip package to be installed!\n"
+                "$ pip install openvino>=2025.2.0\n"
+            )
+            raise
+        ie = Core()
+        w = Path(w)
+        print(w)
+        if w.suffix == ".bin":
+            w = w.with_suffix(".xml")
+
+        if not w.is_file():  # if not *.xml
+            w = next(
+                Path(w).glob("*.xml")
+            )  # get *.xml file from *_openvino_model dir
+        network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
+        if network.get_parameters()[0].get_layout().empty:
+            network.get_parameters()[0].set_layout(Layout("NCWH"))
+        self.executable_network = ie.compile_model(
+            network, device_name="CPU"
+        )  # device_name="MYRIAD" for Intel NCS2
+        self.output_layer = next(iter(self.executable_network.outputs))
 
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
-
-	def load_model(self, w):
-		# self.checker.check_packages(("openvino>=2025.2.0",))
-
-		print(f"Loading {w} for OpenVINO inference...")
-		try:
-			# requires openvino-dev: https://pypi.org/project/openvino-dev/
-			from openvino import Core, Layout
-		except ImportError:
-			print(
-				f"Running {self.__class__} with the specified OpenVINO weights\n{w.name}\n"
-				"requires openvino pip package to be installed!\n"
-				"$ pip install openvino>=2025.2.0\n"
-			)
-			raise
-		ie = Core()
-		w = Path(w)
-		print(w)
-		if w.suffix == '.bin':
-			w = w.with_suffix('.xml')
-
-		if not w.is_file():  # if not *.xml
-			w = next(
-				Path(w).glob("*.xml")
-			)  # get *.xml file from *_openvino_model dir
-		network = ie.read_model(model=w, weights=Path(w).with_suffix(".bin"))
-		if network.get_parameters()[0].get_layout().empty:
-			network.get_parameters()[0].set_layout(Layout("NCWH"))
-		self.executable_network = ie.compile_model(
-			network, device_name="CPU"
-		)  # device_name="MYRIAD" for Intel NCS2
-		self.output_layer = next(iter(self.executable_network.outputs))
-
-	def forward(self, im_batch):
-		im_batch = im_batch.cpu().numpy()  # FP32
-		features = self.executable_network([im_batch])[self.output_layer]
-		return features
+    def forward(self, im_batch):
+        im_batch = im_batch.cpu().numpy()  # FP32
+        features = self.executable_network([im_batch])[self.output_layer]
+        return features
diff --git a/ethology/reid/backends/pytorch_backend.py b/ethology/reid/backends/pytorch_backend.py
index d3dbfa06..2e859cc8 100644
--- a/ethology/reid/backends/pytorch_backend.py
+++ b/ethology/reid/backends/pytorch_backend.py
@@ -1,20 +1,20 @@
 from ethology.reid.backends.base_backend import BaseModelBackend
 from ethology.reid.core.registry import ReIDModelRegistry
 
-class PyTorchBackend(BaseModelBackend):
 
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
+class PyTorchBackend(BaseModelBackend):
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
 
-	def load_model(self, w):
-		# Load a PyTorch model
-		if w and w.is_file():
-			ReIDModelRegistry.load_pretrained_weights(self.model, w)
-		self.model.to(self.device).eval()
-		self.model.half() if self.half else self.model.float()
+    def load_model(self, w):
+        # Load a PyTorch model
+        if w and w.is_file():
+            ReIDModelRegistry.load_pretrained_weights(self.model, w)
+        self.model.to(self.device).eval()
+        self.model.half() if self.half else self.model.float()
 
-	def forward(self, im_batch):
-		features = self.model(im_batch)
-		return features
+    def forward(self, im_batch):
+        features = self.model(im_batch)
+        return features
diff --git a/ethology/reid/backends/tensorrt_backend.py b/ethology/reid/backends/tensorrt_backend.py
index 8dd7d7ee..4f6e95b0 100644
--- a/ethology/reid/backends/tensorrt_backend.py
+++ b/ethology/reid/backends/tensorrt_backend.py
@@ -1,310 +1,400 @@
+# Note: LOGGER can be replaced with print or a local logger if needed
+import os
 from collections import OrderedDict, namedtuple
 
 import numpy as np
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
-# Note: LOGGER can be replaced with print or a local logger if needed
-
-import os
-import sys
-import torch
-import numpy as np
-from collections import namedtuple, OrderedDict
-
-
 
 Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
 
 
 class TensorRTBackend(BaseModelBackend):
-	def __init__(self, engine_path, device=None):
-		import hashlib
-		import requests
-		self.device = device or (torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu"))
-		self.fp16 = False
-		self.model_ = None
-		self.context = None
-		self.bindings = None
-		self.binding_addrs = None
-		self.is_trt10 = False
-		# Download engine if engine_path is a URL
-		if engine_path.startswith("http://") or engine_path.startswith("https://"):
-			# Use a hash of the URL for filename
-			engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
-			filename = f"trt_engine_{engine_hash}.engine"
-			cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
-			os.makedirs(cache_dir, exist_ok=True)
-			cached_file = os.path.join(cache_dir, filename)
-			if not os.path.exists(cached_file):
-				print(f"[TensorRT] Downloading engine from {engine_path} to {cached_file}")
-				with requests.get(engine_path, stream=True) as r:
-					r.raise_for_status()
-					with open(cached_file, 'wb') as f:
-						for chunk in r.iter_content(chunk_size=8192):
-							f.write(chunk)
-			else:
-				print(f"[TensorRT] Using cached engine at {cached_file}")
-			self.engine_path = cached_file
-		else:
-			self.engine_path = engine_path
-		self.load_model(self.engine_path)
-
-	def load_model(self, w):
-		print(f"Loading {w} for TensorRT inference...")
-		try:
-			import tensorrt as trt
-			import pycuda.driver as cuda
-			import pycuda.autoinit  # noqa: F401
-		except ImportError:
-			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH.")
-
-		if self.device.type == "cpu":
-			if torch.cuda.is_available():
-				self.device = torch.device("cuda:0")
-			else:
-				raise ValueError("CUDA device not available for TensorRT inference.")
-
-		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
-		logger = trt.Logger(trt.Logger.INFO)
-
-		# Deserialize the engine
-		with open(w, "rb") as f:
-			with trt.Runtime(logger) as runtime:
-				self.model_ = runtime.deserialize_cuda_engine(f.read())
-
-		# Execution context
-		self.context = self.model_.create_execution_context()
-		self.bindings = OrderedDict()
-
-		self.is_trt10 = not hasattr(self.model_, "num_bindings")
-		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
-
-		# Parse bindings
-		for index in num:
-			if self.is_trt10:
-				name = self.model_.get_tensor_name(index)
-				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
-					self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_tensor_shape(name))
-
-			else:
-				name = self.model_.get_binding_name(index)
-				dtype = trt.nptype(self.model_.get_binding_dtype(index))
-				is_input = self.model_.binding_is_input(index)
-
-				# Handle dynamic shapes
-				if is_input and -1 in self.model_.get_binding_shape(index):
-					profile_index = 0
-					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
-					self.context.set_binding_shape(index, opt_shape)
-
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_binding_shape(index))
-			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
-			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-
-		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
-
-		# Execution context
-		self.context = self.model_.create_execution_context()
-		self.bindings = OrderedDict()
-
-		self.is_trt10 = not hasattr(self.model_, "num_bindings")
-		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
-
-		# Parse bindings
-		for index in num:
-			if self.is_trt10:
-				name = self.model_.get_tensor_name(index)
-				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
-						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_tensor_shape(name))
-
-			else:
-				name = self.model_.get_binding_name(index)
-				dtype = trt.nptype(self.model_.get_binding_dtype(index))
-				is_input = self.model_.binding_is_input(index)
-
-				# Handle dynamic shapes
-				if is_input and -1 in self.model_.get_binding_shape(index):
-					profile_index = 0
-					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
-					self.context.set_binding_shape(index, opt_shape)
-
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_binding_shape(index))
-			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
-			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-
-		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
-
-	def forward(self, im_batch):
-		temp_im_batch = im_batch.clone()
-		batch_array = []
-		inp_batch = im_batch.shape[0]
-		out_batch = self.bindings["output"].shape[0]
-		resultant_features = []
-
-		# Divide batch to sub batches
-		while inp_batch > out_batch:
-			batch_array.append(temp_im_batch[:out_batch])
-			temp_im_batch = temp_im_batch[out_batch:]
-			inp_batch = temp_im_batch.shape[0]
-		if temp_im_batch.shape[0] > 0:
-			batch_array.append(temp_im_batch)
-
-		for temp_batch in batch_array:
-			# Adjust for dynamic shapes
-			if temp_batch.shape != self.bindings["images"].shape:
-				if self.is_trt10:
-					self.context.set_input_shape("images", temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
-				else:
-					i_in = self.model_.get_binding_index("images")
-					i_out = self.model_.get_binding_index("output")
-					self.context.set_binding_shape(i_in, temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					output_shape = tuple(self.context.get_binding_shape(i_out))
-					self.bindings["output"].data.resize_(output_shape)
-
-			s = self.bindings["images"].shape
-			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
-
-			self.binding_addrs["images"] = int(temp_batch.data_ptr())
-
-			# Execute inference
-			self.context.execute_v2(list(self.binding_addrs.values()))
-			features = self.bindings["output"].data
-			resultant_features.append(features.clone())
-
-		if len(resultant_features) == 1:
-			return resultant_features[0]
-		else:
-			rslt_features = torch.cat(resultant_features, dim=0)
-			rslt_features = rslt_features[: im_batch.shape[0]]
-			return rslt_features
-
-	def load_model(self, w):
-		print(f"Loading {w} for TensorRT inference...")
-		# self.checker.check_packages(("nvidia-tensorrt",))
-		try:
-			import tensorrt as trt  # TensorRT library
-		except ImportError:
-			raise ImportError("Please install tensorrt to use this backend.")
-
-		if self.device.type == "cpu":
-			if torch.cuda.is_available():
-				self.device = torch.device("cuda:0")
-			else:
-				raise ValueError("CUDA device not available for TensorRT inference.")
-
-		Binding = namedtuple("Binding", ("name", "dtype", "shape", "data", "ptr"))
-		logger = trt.Logger(trt.Logger.INFO)
-
-		# Deserialize the engine
-		with open(w, "rb") as f:
-			with trt.Runtime(logger) as runtime:
-				self.model_ = runtime.deserialize_cuda_engine(f.read())
-
-		# Execution context
-		self.context = self.model_.create_execution_context()
-		self.bindings = OrderedDict()
-
-		self.is_trt10 = not hasattr(self.model_, "num_bindings")
-		num = range(self.model_.num_io_tensors) if self.is_trt10 else range(self.model_.num_bindings)
-
-		# Parse bindings
-		for index in num:
-			if self.is_trt10:
-				name = self.model_.get_tensor_name(index)
-				dtype = trt.nptype(self.model_.get_tensor_dtype(name))
-				is_input = self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
-				if is_input and -1 in tuple(self.model_.get_tensor_shape(name)):
-						self.context.set_input_shape(name, tuple(self.model_.get_tensor_profile_shape(name, 0)[1]))
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_tensor_shape(name))
-
-			else:
-				name = self.model_.get_binding_name(index)
-				dtype = trt.nptype(self.model_.get_binding_dtype(index))
-				is_input = self.model_.binding_is_input(index)
-
-				# Handle dynamic shapes
-				if is_input and -1 in self.model_.get_binding_shape(index):
-					profile_index = 0
-					min_shape, opt_shape, max_shape = self.model_.get_profile_shape(profile_index, index)
-					self.context.set_binding_shape(index, opt_shape)
-
-				if is_input and dtype == np.float16:
-					self.fp16 = True
-
-				shape = tuple(self.context.get_binding_shape(index))
-			data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(self.device)
-			self.bindings[name] = Binding(name, dtype, shape, data, int(data.data_ptr()))
-
-		self.binding_addrs = OrderedDict((n, d.ptr) for n, d in self.bindings.items())
-
-	def forward(self, im_batch):
-		temp_im_batch = im_batch.clone()
-		batch_array = []
-		inp_batch = im_batch.shape[0]
-		out_batch = self.bindings["output"].shape[0]
-		resultant_features = []
-
-		# Divide batch to sub batches
-		while inp_batch > out_batch:
-			batch_array.append(temp_im_batch[:out_batch])
-			temp_im_batch = temp_im_batch[out_batch:]
-			inp_batch = temp_im_batch.shape[0]
-		if temp_im_batch.shape[0] > 0:
-			batch_array.append(temp_im_batch)
-
-		for temp_batch in batch_array:
-			# Adjust for dynamic shapes
-			if temp_batch.shape != self.bindings["images"].shape:
-				if self.is_trt10:
-					self.context.set_input_shape("images", temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					self.bindings["output"].data.resize_(tuple(self.context.get_tensor_shape("output")))
-				else:
-					i_in = self.model_.get_binding_index("images")
-					i_out = self.model_.get_binding_index("output")
-					self.context.set_binding_shape(i_in, temp_batch.shape)
-					self.bindings["images"] = self.bindings["images"]._replace(shape=temp_batch.shape)
-					output_shape = tuple(self.context.get_binding_shape(i_out))
-					self.bindings["output"].data.resize_(output_shape)
-
-			s = self.bindings["images"].shape
-			assert temp_batch.shape == s, f"Input size {temp_batch.shape} does not match model size {s}"
-
-			self.binding_addrs["images"] = int(temp_batch.data_ptr())
-
-			# Execute inference
-			self.context.execute_v2(list(self.binding_addrs.values()))
-			features = self.bindings["output"].data
-			resultant_features.append(features.clone())
-
-		if len(resultant_features) == 1:
-			return resultant_features[0]
-		else:
-			rslt_features = torch.cat(resultant_features, dim=0)
-			rslt_features = rslt_features[: im_batch.shape[0]]
-			return rslt_features
+    def __init__(self, engine_path, device=None):
+        import hashlib
+
+        import requests
+
+        self.device = device or (
+            torch.device("cuda")
+            if torch.cuda.is_available()
+            else torch.device("cpu")
+        )
+        self.fp16 = False
+        self.model_ = None
+        self.context = None
+        self.bindings = None
+        self.binding_addrs = None
+        self.is_trt10 = False
+        # Download engine if engine_path is a URL
+        if engine_path.startswith("http://") or engine_path.startswith(
+            "https://"
+        ):
+            # Use a hash of the URL for filename
+            engine_hash = hashlib.md5(engine_path.encode()).hexdigest()
+            filename = f"trt_engine_{engine_hash}.engine"
+            cache_dir = os.path.expanduser("~/.cache/ethology/tensorrt/")
+            os.makedirs(cache_dir, exist_ok=True)
+            cached_file = os.path.join(cache_dir, filename)
+            if not os.path.exists(cached_file):
+                print(
+                    f"[TensorRT] Downloading engine from {engine_path} to {cached_file}"
+                )
+                with requests.get(engine_path, stream=True) as r:
+                    r.raise_for_status()
+                    with open(cached_file, "wb") as f:
+                        for chunk in r.iter_content(chunk_size=8192):
+                            f.write(chunk)
+            else:
+                print(f"[TensorRT] Using cached engine at {cached_file}")
+            self.engine_path = cached_file
+        else:
+            self.engine_path = engine_path
+        self.load_model(self.engine_path)
+
+    def load_model(self, w):
+        print(f"Loading {w} for TensorRT inference...")
+        try:
+            import pycuda.autoinit  # noqa: F401
+            import pycuda.driver as cuda
+            import tensorrt as trt
+        except ImportError:
+            raise ImportError(
+                "TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libnvinfer.so.8 is available in LD_LIBRARY_PATH."
+            )
+
+        if self.device.type == "cpu":
+            if torch.cuda.is_available():
+                self.device = torch.device("cuda:0")
+            else:
+                raise ValueError(
+                    "CUDA device not available for TensorRT inference."
+                )
+
+        Binding = namedtuple(
+            "Binding", ("name", "dtype", "shape", "data", "ptr")
+        )
+        logger = trt.Logger(trt.Logger.INFO)
+
+        # Deserialize the engine
+        with open(w, "rb") as f, trt.Runtime(logger) as runtime:
+            self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+        # Execution context
+        self.context = self.model_.create_execution_context()
+        self.bindings = OrderedDict()
+
+        self.is_trt10 = not hasattr(self.model_, "num_bindings")
+        num = (
+            range(self.model_.num_io_tensors)
+            if self.is_trt10
+            else range(self.model_.num_bindings)
+        )
+
+        # Parse bindings
+        for index in num:
+            if self.is_trt10:
+                name = self.model_.get_tensor_name(index)
+                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+                is_input = (
+                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input and -1 in tuple(
+                    self.model_.get_tensor_shape(name)
+                ):
+                    self.context.set_input_shape(
+                        name,
+                        tuple(
+                            self.model_.get_tensor_profile_shape(name, 0)[1]
+                        ),
+                    )
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_tensor_shape(name))
+
+            else:
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                is_input = self.model_.binding_is_input(index)
+
+                # Handle dynamic shapes
+                if is_input and -1 in self.model_.get_binding_shape(index):
+                    profile_index = 0
+                    min_shape, opt_shape, max_shape = (
+                        self.model_.get_profile_shape(profile_index, index)
+                    )
+                    self.context.set_binding_shape(index, opt_shape)
+
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_binding_shape(index))
+            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
+                self.device
+            )
+            self.bindings[name] = Binding(
+                name, dtype, shape, data, int(data.data_ptr())
+            )
+
+        self.binding_addrs = OrderedDict(
+            (n, d.ptr) for n, d in self.bindings.items()
+        )
+
+        # Execution context
+        self.context = self.model_.create_execution_context()
+        self.bindings = OrderedDict()
+
+        self.is_trt10 = not hasattr(self.model_, "num_bindings")
+        num = (
+            range(self.model_.num_io_tensors)
+            if self.is_trt10
+            else range(self.model_.num_bindings)
+        )
+
+        # Parse bindings
+        for index in num:
+            if self.is_trt10:
+                name = self.model_.get_tensor_name(index)
+                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+                is_input = (
+                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input and -1 in tuple(
+                    self.model_.get_tensor_shape(name)
+                ):
+                    self.context.set_input_shape(
+                        name,
+                        tuple(
+                            self.model_.get_tensor_profile_shape(name, 0)[1]
+                        ),
+                    )
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_tensor_shape(name))
+
+            else:
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                is_input = self.model_.binding_is_input(index)
+
+                # Handle dynamic shapes
+                if is_input and -1 in self.model_.get_binding_shape(index):
+                    profile_index = 0
+                    min_shape, opt_shape, max_shape = (
+                        self.model_.get_profile_shape(profile_index, index)
+                    )
+                    self.context.set_binding_shape(index, opt_shape)
+
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_binding_shape(index))
+            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
+                self.device
+            )
+            self.bindings[name] = Binding(
+                name, dtype, shape, data, int(data.data_ptr())
+            )
+
+        self.binding_addrs = OrderedDict(
+            (n, d.ptr) for n, d in self.bindings.items()
+        )
+
+    def forward(self, im_batch):
+        temp_im_batch = im_batch.clone()
+        batch_array = []
+        inp_batch = im_batch.shape[0]
+        out_batch = self.bindings["output"].shape[0]
+        resultant_features = []
+
+        # Divide batch to sub batches
+        while inp_batch > out_batch:
+            batch_array.append(temp_im_batch[:out_batch])
+            temp_im_batch = temp_im_batch[out_batch:]
+            inp_batch = temp_im_batch.shape[0]
+        if temp_im_batch.shape[0] > 0:
+            batch_array.append(temp_im_batch)
+
+        for temp_batch in batch_array:
+            # Adjust for dynamic shapes
+            if temp_batch.shape != self.bindings["images"].shape:
+                if self.is_trt10:
+                    self.context.set_input_shape("images", temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    self.bindings["output"].data.resize_(
+                        tuple(self.context.get_tensor_shape("output"))
+                    )
+                else:
+                    i_in = self.model_.get_binding_index("images")
+                    i_out = self.model_.get_binding_index("output")
+                    self.context.set_binding_shape(i_in, temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    output_shape = tuple(self.context.get_binding_shape(i_out))
+                    self.bindings["output"].data.resize_(output_shape)
+
+            s = self.bindings["images"].shape
+            assert temp_batch.shape == s, (
+                f"Input size {temp_batch.shape} does not match model size {s}"
+            )
+
+            self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+            # Execute inference
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            features = self.bindings["output"].data
+            resultant_features.append(features.clone())
+
+        if len(resultant_features) == 1:
+            return resultant_features[0]
+        else:
+            rslt_features = torch.cat(resultant_features, dim=0)
+            rslt_features = rslt_features[: im_batch.shape[0]]
+            return rslt_features
+
+    def load_model(self, w):
+        print(f"Loading {w} for TensorRT inference...")
+        # self.checker.check_packages(("nvidia-tensorrt",))
+        try:
+            import tensorrt as trt  # TensorRT library
+        except ImportError:
+            raise ImportError("Please install tensorrt to use this backend.")
+
+        if self.device.type == "cpu":
+            if torch.cuda.is_available():
+                self.device = torch.device("cuda:0")
+            else:
+                raise ValueError(
+                    "CUDA device not available for TensorRT inference."
+                )
+
+        Binding = namedtuple(
+            "Binding", ("name", "dtype", "shape", "data", "ptr")
+        )
+        logger = trt.Logger(trt.Logger.INFO)
+
+        # Deserialize the engine
+        with open(w, "rb") as f, trt.Runtime(logger) as runtime:
+            self.model_ = runtime.deserialize_cuda_engine(f.read())
+
+        # Execution context
+        self.context = self.model_.create_execution_context()
+        self.bindings = OrderedDict()
+
+        self.is_trt10 = not hasattr(self.model_, "num_bindings")
+        num = (
+            range(self.model_.num_io_tensors)
+            if self.is_trt10
+            else range(self.model_.num_bindings)
+        )
+
+        # Parse bindings
+        for index in num:
+            if self.is_trt10:
+                name = self.model_.get_tensor_name(index)
+                dtype = trt.nptype(self.model_.get_tensor_dtype(name))
+                is_input = (
+                    self.model_.get_tensor_mode(name) == trt.TensorIOMode.INPUT
+                )
+                if is_input and -1 in tuple(
+                    self.model_.get_tensor_shape(name)
+                ):
+                    self.context.set_input_shape(
+                        name,
+                        tuple(
+                            self.model_.get_tensor_profile_shape(name, 0)[1]
+                        ),
+                    )
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_tensor_shape(name))
+
+            else:
+                name = self.model_.get_binding_name(index)
+                dtype = trt.nptype(self.model_.get_binding_dtype(index))
+                is_input = self.model_.binding_is_input(index)
+
+                # Handle dynamic shapes
+                if is_input and -1 in self.model_.get_binding_shape(index):
+                    profile_index = 0
+                    min_shape, opt_shape, max_shape = (
+                        self.model_.get_profile_shape(profile_index, index)
+                    )
+                    self.context.set_binding_shape(index, opt_shape)
+
+                if is_input and dtype == np.float16:
+                    self.fp16 = True
+
+                shape = tuple(self.context.get_binding_shape(index))
+            data = torch.from_numpy(np.empty(shape, dtype=dtype)).to(
+                self.device
+            )
+            self.bindings[name] = Binding(
+                name, dtype, shape, data, int(data.data_ptr())
+            )
+
+        self.binding_addrs = OrderedDict(
+            (n, d.ptr) for n, d in self.bindings.items()
+        )
+
+    def forward(self, im_batch):
+        temp_im_batch = im_batch.clone()
+        batch_array = []
+        inp_batch = im_batch.shape[0]
+        out_batch = self.bindings["output"].shape[0]
+        resultant_features = []
+
+        # Divide batch to sub batches
+        while inp_batch > out_batch:
+            batch_array.append(temp_im_batch[:out_batch])
+            temp_im_batch = temp_im_batch[out_batch:]
+            inp_batch = temp_im_batch.shape[0]
+        if temp_im_batch.shape[0] > 0:
+            batch_array.append(temp_im_batch)
+
+        for temp_batch in batch_array:
+            # Adjust for dynamic shapes
+            if temp_batch.shape != self.bindings["images"].shape:
+                if self.is_trt10:
+                    self.context.set_input_shape("images", temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    self.bindings["output"].data.resize_(
+                        tuple(self.context.get_tensor_shape("output"))
+                    )
+                else:
+                    i_in = self.model_.get_binding_index("images")
+                    i_out = self.model_.get_binding_index("output")
+                    self.context.set_binding_shape(i_in, temp_batch.shape)
+                    self.bindings["images"] = self.bindings["images"]._replace(
+                        shape=temp_batch.shape
+                    )
+                    output_shape = tuple(self.context.get_binding_shape(i_out))
+                    self.bindings["output"].data.resize_(output_shape)
+
+            s = self.bindings["images"].shape
+            assert temp_batch.shape == s, (
+                f"Input size {temp_batch.shape} does not match model size {s}"
+            )
+
+            self.binding_addrs["images"] = int(temp_batch.data_ptr())
+
+            # Execute inference
+            self.context.execute_v2(list(self.binding_addrs.values()))
+            features = self.bindings["output"].data
+            resultant_features.append(features.clone())
+
+        if len(resultant_features) == 1:
+            return resultant_features[0]
+        else:
+            rslt_features = torch.cat(resultant_features, dim=0)
+            rslt_features = rslt_features[: im_batch.shape[0]]
+            return rslt_features
diff --git a/ethology/reid/backends/tflite_backend.py b/ethology/reid/backends/tflite_backend.py
index b0a7b707..eb10d4e8 100644
--- a/ethology/reid/backends/tflite_backend.py
+++ b/ethology/reid/backends/tflite_backend.py
@@ -4,37 +4,39 @@
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+
 # Note: LOGGER can be replaced with print or a local logger if needed
 
+
 class TFLiteBackend(BaseModelBackend):
-	"""
-	A class to handle TensorFlow Lite model inference with dynamic batch size support.
-	"""
-	def __init__(self, weights: Path, device: str, half: bool):
-		super().__init__(weights, device, half)
-		self.nhwc = True
-		self.half = False
-
-	def load_model(self, w):
-		# self.checker.check_packages(("tensorflow",))
-		print(f"Loading {str(w)} for TensorFlow Lite inference...")
-		import tensorflow as tf
-		self.interpreter = tf.lite.Interpreter(model_path=str(w))
-		self.interpreter.allocate_tensors()
-		self.input_details = self.interpreter.get_input_details()
-		self.output_details = self.interpreter.get_output_details()
-		self.current_allocated_batch_size = self.input_details[0]["shape"][0]
-
-	def forward(self, im_batch: torch.Tensor) -> np.ndarray:
-		im_batch = im_batch.cpu().numpy()
-		batch_size = im_batch.shape[0]
-		if batch_size != self.current_allocated_batch_size:
-			self.interpreter.resize_tensor_input(
-				self.input_details[0]["index"], [batch_size, 256, 128, 3]
-			)
-			self.interpreter.allocate_tensors()
-			self.current_allocated_batch_size = batch_size
-		self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
-		self.interpreter.invoke()
-		features = self.interpreter.get_tensor(self.output_details[0]["index"])
-		return features
+    """A class to handle TensorFlow Lite model inference with dynamic batch size support."""
+
+    def __init__(self, weights: Path, device: str, half: bool):
+        super().__init__(weights, device, half)
+        self.nhwc = True
+        self.half = False
+
+    def load_model(self, w):
+        # self.checker.check_packages(("tensorflow",))
+        print(f"Loading {str(w)} for TensorFlow Lite inference...")
+        import tensorflow as tf
+
+        self.interpreter = tf.lite.Interpreter(model_path=str(w))
+        self.interpreter.allocate_tensors()
+        self.input_details = self.interpreter.get_input_details()
+        self.output_details = self.interpreter.get_output_details()
+        self.current_allocated_batch_size = self.input_details[0]["shape"][0]
+
+    def forward(self, im_batch: torch.Tensor) -> np.ndarray:
+        im_batch = im_batch.cpu().numpy()
+        batch_size = im_batch.shape[0]
+        if batch_size != self.current_allocated_batch_size:
+            self.interpreter.resize_tensor_input(
+                self.input_details[0]["index"], [batch_size, 256, 128, 3]
+            )
+            self.interpreter.allocate_tensors()
+            self.current_allocated_batch_size = batch_size
+        self.interpreter.set_tensor(self.input_details[0]["index"], im_batch)
+        self.interpreter.invoke()
+        features = self.interpreter.get_tensor(self.output_details[0]["index"])
+        return features
diff --git a/ethology/reid/backends/torchscript_backend.py b/ethology/reid/backends/torchscript_backend.py
index b6602171..1142fcc4 100644
--- a/ethology/reid/backends/torchscript_backend.py
+++ b/ethology/reid/backends/torchscript_backend.py
@@ -1,20 +1,21 @@
 import torch
 
 from ethology.reid.backends.base_backend import BaseModelBackend
+
 # Note: LOGGER can be replaced with print or a local logger if needed
 
-class TorchscriptBackend(BaseModelBackend):
 
-	def __init__(self, weights, device, half):
-		super().__init__(weights, device, half)
-		self.nhwc = False
-		self.half = half
+class TorchscriptBackend(BaseModelBackend):
+    def __init__(self, weights, device, half):
+        super().__init__(weights, device, half)
+        self.nhwc = False
+        self.half = half
 
-	def load_model(self, w):
-		print(f"Loading {w} for TorchScript inference...")
-		self.model = torch.jit.load(w)
-		self.model.half() if self.half else self.model.float()
+    def load_model(self, w):
+        print(f"Loading {w} for TorchScript inference...")
+        self.model = torch.jit.load(w)
+        self.model.half() if self.half else self.model.float()
 
-	def forward(self, im_batch):
-		features = self.model(im_batch)
-		return features
+    def forward(self, im_batch):
+        features = self.model(im_batch)
+        return features
diff --git a/ethology/reid/core/auto_backend.py b/ethology/reid/core/auto_backend.py
index 6f43eba2..22f2c4e2 100644
--- a/ethology/reid/core/auto_backend.py
+++ b/ethology/reid/core/auto_backend.py
@@ -1,74 +1,89 @@
-
 from pathlib import Path
-from typing import Tuple, Union
+
 import torch
+
 from ethology.reid.backends.onnx_backend import ONNXBackend
 from ethology.reid.backends.openvino_backend import OpenVinoBackend
 from ethology.reid.backends.pytorch_backend import PyTorchBackend
+
 try:
-	from ethology.reid.backends.tensorrt_backend import TensorRTBackend
+    from ethology.reid.backends.tensorrt_backend import TensorRTBackend
 except ImportError:
-	class TensorRTBackend:
-		def __init__(self, *args, **kwargs):
-			raise ImportError("TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH.")
+
+    class TensorRTBackend:
+        def __init__(self, *args, **kwargs):
+            raise ImportError(
+                "TensorRT and pycuda are required for TensorRTBackend. Please install them and ensure libcudnn.so.8 is available in LD_LIBRARY_PATH."
+            )
+
+
 from ethology.reid.backends.tflite_backend import TFLiteBackend
 from ethology.reid.backends.torchscript_backend import TorchscriptBackend
+
 # from ethology.reid.core import export_formats  # If needed, implement or copy export_formats
 # from ethology.utils import WEIGHTS  # If needed, implement or set WEIGHTS
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 # from ethology.utils.torch_utils import select_device  # If needed, implement or set select_device
 
+
 class ReidAutoBackend:
-	def __init__(
-		self,
-		weights: Path,
-		device: torch.device = torch.device("cpu"),
-		half: bool = False,
-	):
-		super().__init__()
-		w = weights[0] if isinstance(weights, list) else weights
-		(
-			self.pt,
-			self.pth,
-			self.jit,
-			self.onnx,
-			self.xml,
-			self.engine,
-			self.tflite,
-		) = self.model_type(w)
-		self.weights = weights
-		self.device = device  # For simplicity, skip select_device for now
-		self.half = half
-		self.model = self.get_backend()
+    def __init__(
+        self,
+        weights: Path,
+        device: torch.device = torch.device("cpu"),
+        half: bool = False,
+    ):
+        super().__init__()
+        w = weights[0] if isinstance(weights, list) else weights
+        (
+            self.pt,
+            self.pth,
+            self.jit,
+            self.onnx,
+            self.xml,
+            self.engine,
+            self.tflite,
+        ) = self.model_type(w)
+        self.weights = weights
+        self.device = device  # For simplicity, skip select_device for now
+        self.half = half
+        self.model = self.get_backend()
 
-	def get_backend(self):
-		backend_map = {
-			self.pt or self.pth: PyTorchBackend,
-			self.jit: TorchscriptBackend,
-			self.onnx: ONNXBackend,
-			self.engine: TensorRTBackend,
-			self.xml: OpenVinoBackend,
-			self.tflite: TFLiteBackend,
-		}
-		for condition, backend_class in backend_map.items():
-			if condition:
-				return backend_class(self.weights, self.device, self.half)
-		raise RuntimeError("This model framework is not supported yet!")
+    def get_backend(self):
+        backend_map = {
+            self.pt or self.pth: PyTorchBackend,
+            self.jit: TorchscriptBackend,
+            self.onnx: ONNXBackend,
+            self.engine: TensorRTBackend,
+            self.xml: OpenVinoBackend,
+            self.tflite: TFLiteBackend,
+        }
+        for condition, backend_class in backend_map.items():
+            if condition:
+                return backend_class(self.weights, self.device, self.half)
+        raise RuntimeError("This model framework is not supported yet!")
 
-	def check_suffix(self, file: Path = "osnet_x0_25_msmt17.pt", suffix: Union[str, Tuple[str, ...]] = (".pt",), msg: str = ""):
-		suffix = [suffix] if isinstance(suffix, str) else list(suffix)
-		files = [file] if isinstance(file, (str, Path)) else list(file)
-		for f in files:
-			file_suffix = Path(f).suffix.lower()
-			if file_suffix and file_suffix not in suffix:
-				print(f"File {f} does not have an acceptable suffix. Expected: {suffix}")
+    def check_suffix(
+        self,
+        file: Path = "osnet_x0_25_msmt17.pt",
+        suffix: str | tuple[str, ...] = (".pt",),
+        msg: str = "",
+    ):
+        suffix = [suffix] if isinstance(suffix, str) else list(suffix)
+        files = [file] if isinstance(file, (str, Path)) else list(file)
+        for f in files:
+            file_suffix = Path(f).suffix.lower()
+            if file_suffix and file_suffix not in suffix:
+                print(
+                    f"File {f} does not have an acceptable suffix. Expected: {suffix}"
+                )
 
-	def model_type(self, p: Path) -> Tuple[bool, ...]:
-		# For demo, just check for .pt
-		sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
-		self.check_suffix(p, sf)
-		types = [str(Path(p)).endswith(s) for s in sf]
-		# OpenVINO explicit check
-		if Path(p).suffix in ['.xml', '.bin']:
-			types[3] = True
-		return tuple(types)
+    def model_type(self, p: Path) -> tuple[bool, ...]:
+        # For demo, just check for .pt
+        sf = [".pt", ".pth", ".jit", ".onnx", ".xml", ".engine", ".tflite"]
+        self.check_suffix(p, sf)
+        types = [str(Path(p)).endswith(s) for s in sf]
+        # OpenVINO explicit check
+        if Path(p).suffix in [".xml", ".bin"]:
+            types[3] = True
+        return tuple(types)
diff --git a/ethology/reid/core/config.py b/ethology/reid/core/config.py
index 926c0cc9..dc17cc14 100644
--- a/ethology/reid/core/config.py
+++ b/ethology/reid/core/config.py
@@ -1,16 +1,16 @@
 MODEL_TYPES = [
-	"resnet50",
-	"resnet101",
-	"mlfn",
-	"hacnn",
-	"mobilenetv2_x1_0",
-	"mobilenetv2_x1_4",
-	"osnet_x1_0",
-	"osnet_x0_75",
-	"osnet_x0_5",
-	"osnet_x0_25",
-	"osnet_ibn_x1_0",
-	"osnet_ain_x1_0",
-	"lmbn_n",
-	"clip",
+    "resnet50",
+    "resnet101",
+    "mlfn",
+    "hacnn",
+    "mobilenetv2_x1_0",
+    "mobilenetv2_x1_4",
+    "osnet_x1_0",
+    "osnet_x0_75",
+    "osnet_x0_5",
+    "osnet_x0_25",
+    "osnet_ibn_x1_0",
+    "osnet_ain_x1_0",
+    "lmbn_n",
+    "clip",
 ]
diff --git a/ethology/reid/core/factory.py b/ethology/reid/core/factory.py
index bc8b6ab1..27406383 100644
--- a/ethology/reid/core/factory.py
+++ b/ethology/reid/core/factory.py
@@ -1,30 +1,44 @@
-
 # Import model constructors from ethology's local backbones
 from ethology.reid.backbones.hacnn import HACNN
 from ethology.reid.backbones.mlfn import mlfn
-from ethology.reid.backbones.mobilenetv2 import mobilenetv2_x1_0, mobilenetv2_x1_4
-from ethology.reid.backbones.osnet import osnet_ibn_x1_0, osnet_x0_5, osnet_x0_25, osnet_x0_75, osnet_x1_0
-from ethology.reid.backbones.osnet_ain import osnet_ain_x0_5, osnet_ain_x0_25, osnet_ain_x0_75, osnet_ain_x1_0
+from ethology.reid.backbones.mobilenetv2 import (
+    mobilenetv2_x1_0,
+    mobilenetv2_x1_4,
+)
+from ethology.reid.backbones.osnet import (
+    osnet_ibn_x1_0,
+    osnet_x0_5,
+    osnet_x0_25,
+    osnet_x0_75,
+    osnet_x1_0,
+)
+from ethology.reid.backbones.osnet_ain import (
+    osnet_ain_x0_5,
+    osnet_ain_x0_25,
+    osnet_ain_x0_75,
+    osnet_ain_x1_0,
+)
 from ethology.reid.backbones.resnet import resnet50, resnet101
+
 # from ethology.reid.backbones.lmbn.lmbn_n import LMBN_n  # If present
 # from ethology.reid.backbones.clip.make_model import make_model  # If present
 
 MODEL_FACTORY = {
-	"resnet50": resnet50,
-	"resnet101": resnet101,
-	"mobilenetv2_x1_0": mobilenetv2_x1_0,
-	"mobilenetv2_x1_4": mobilenetv2_x1_4,
-	"hacnn": HACNN,
-	"mlfn": mlfn,
-	"osnet_x1_0": osnet_x1_0,
-	"osnet_x0_75": osnet_x0_75,
-	"osnet_x0_5": osnet_x0_5,
-	"osnet_x0_25": osnet_x0_25,
-	"osnet_ibn_x1_0": osnet_ibn_x1_0,
-	"osnet_ain_x1_0": osnet_ain_x1_0,
-	"osnet_ain_x0_75": osnet_ain_x0_75,
-	"osnet_ain_x0_5": osnet_ain_x0_5,
-	"osnet_ain_x0_25": osnet_ain_x0_25,
-	# "lmbn_n": LMBN_n,  # Uncomment if implemented
-	# "clip": make_model,  # Uncomment if implemented
+    "resnet50": resnet50,
+    "resnet101": resnet101,
+    "mobilenetv2_x1_0": mobilenetv2_x1_0,
+    "mobilenetv2_x1_4": mobilenetv2_x1_4,
+    "hacnn": HACNN,
+    "mlfn": mlfn,
+    "osnet_x1_0": osnet_x1_0,
+    "osnet_x0_75": osnet_x0_75,
+    "osnet_x0_5": osnet_x0_5,
+    "osnet_x0_25": osnet_x0_25,
+    "osnet_ibn_x1_0": osnet_ibn_x1_0,
+    "osnet_ain_x1_0": osnet_ain_x1_0,
+    "osnet_ain_x0_75": osnet_ain_x0_75,
+    "osnet_ain_x0_5": osnet_ain_x0_5,
+    "osnet_ain_x0_25": osnet_ain_x0_25,
+    # "lmbn_n": LMBN_n,  # Uncomment if implemented
+    # "clip": make_model,  # Uncomment if implemented
 }
diff --git a/ethology/reid/core/handler.py b/ethology/reid/core/handler.py
index b5e51391..ba521ab2 100644
--- a/ethology/reid/core/handler.py
+++ b/ethology/reid/core/handler.py
@@ -2,32 +2,35 @@
 
 # Thin wrapper to use BoxMOT ReID models in ethology
 from pathlib import Path
-from typing import Union
-import numpy as np
 
+import numpy as np
 
 # Import ethology's local ReID handler
 from ethology.reid.core.reid_handler import ReID as EthologyReID
 
+
 class ReIDHandler:
-    """
-    Ethology ReID handler using local models and backends.
-    """
-    def __init__(self, weights: Union[str, Path], device='cpu', half=False):
+    """Ethology ReID handler using local models and backends."""
+
+    def __init__(self, weights: str | Path, device="cpu", half=False):
         self.model = EthologyReID(weights=weights, device=device, half=half)
 
-    def extract_features(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
-        """
-        Extract feature embeddings for detections in a frame.
+    def extract_features(
+        self, frame: np.ndarray, dets: np.ndarray
+    ) -> np.ndarray:
+        """Extract feature embeddings for detections in a frame.
+
         Parameters
         ----------
         frame : np.ndarray
             (H, W, C) BGR image.
         dets : np.ndarray
             (N, 6) array of detections (x1, y1, x2, y2, conf, cls).
+
         Returns
         -------
         np.ndarray
             (N, D) feature embeddings.
+
         """
         return self.model(frame, dets)
diff --git a/ethology/reid/core/registry.py b/ethology/reid/core/registry.py
index 333cff2f..4b9c27fd 100644
--- a/ethology/reid/core/registry.py
+++ b/ethology/reid/core/registry.py
@@ -1,71 +1,88 @@
-
 from collections import OrderedDict
+
 import torch
-from ethology.reid.core.config import MODEL_TYPES  #, NR_CLASSES_DICT, TRAINED_URLS
+
+from ethology.reid.core.config import (
+    MODEL_TYPES,  # , NR_CLASSES_DICT, TRAINED_URLS
+)
 from ethology.reid.core.factory import MODEL_FACTORY
+
 # from ethology.utils import logger as LOGGER  # If needed, implement or set LOGGER
 
+
 class ReIDModelRegistry:
-	"""Encapsulates model registration and related utilities."""
+    """Encapsulates model registration and related utilities."""
 
-	@staticmethod
-	def show_downloadable_models():
-		# LOGGER.info("Available .pt ReID models for automatic download")
-		# LOGGER.info(list(TRAINED_URLS.keys()))
-		pass
+    @staticmethod
+    def show_downloadable_models():
+        # LOGGER.info("Available .pt ReID models for automatic download")
+        # LOGGER.info(list(TRAINED_URLS.keys()))
+        pass
 
-	@staticmethod
-	def get_model_name(model):
-		for name in MODEL_TYPES:
-			if name in model.name:
-				return name
-		return None
+    @staticmethod
+    def get_model_name(model):
+        for name in MODEL_TYPES:
+            if name in model.name:
+                return name
+        return None
 
-	@staticmethod
-	def get_model_url(model):
-		# return TRAINED_URLS.get(model.name, None)
-		return None
+    @staticmethod
+    def get_model_url(model):
+        # return TRAINED_URLS.get(model.name, None)
+        return None
 
-	@staticmethod
-	def load_pretrained_weights(model, weight_path):
-		device = "cpu" if not torch.cuda.is_available() else None
-		checkpoint = torch.load(
-			weight_path,
-			map_location=torch.device("cpu") if device == "cpu" else None,
-			weights_only=False,
-			encoding='latin1',
-		)
-		state_dict = checkpoint.get("state_dict", checkpoint)
-		model_dict = model.state_dict()
-		new_state_dict = OrderedDict()
-		matched_layers, discarded_layers = [], []
-		for k, v in state_dict.items():
-			key = k[7:] if k.startswith("module.") else k
-			if key in model_dict and model_dict[key].size() == v.size():
-				new_state_dict[key] = v
-				matched_layers.append(key)
-			else:
-				discarded_layers.append(key)
-		model_dict.update(new_state_dict)
-		model.load_state_dict(model_dict)
+    @staticmethod
+    def load_pretrained_weights(model, weight_path):
+        device = "cpu" if not torch.cuda.is_available() else None
+        checkpoint = torch.load(
+            weight_path,
+            map_location=torch.device("cpu") if device == "cpu" else None,
+            weights_only=False,
+            encoding="latin1",
+        )
+        state_dict = checkpoint.get("state_dict", checkpoint)
+        model_dict = model.state_dict()
+        new_state_dict = OrderedDict()
+        matched_layers, discarded_layers = [], []
+        for k, v in state_dict.items():
+            key = k[7:] if k.startswith("module.") else k
+            if key in model_dict and model_dict[key].size() == v.size():
+                new_state_dict[key] = v
+                matched_layers.append(key)
+            else:
+                discarded_layers.append(key)
+        model_dict.update(new_state_dict)
+        model.load_state_dict(model_dict)
 
-	@staticmethod
-	def show_available_models():
-		# LOGGER.info("Available models:")
-		# LOGGER.info(list(MODEL_FACTORY.keys()))
-		pass
+    @staticmethod
+    def show_available_models():
+        # LOGGER.info("Available models:")
+        # LOGGER.info(list(MODEL_FACTORY.keys()))
+        pass
 
-	@staticmethod
-	def get_nr_classes(weights):
-		# dataset_key = weights.name.split("_")[1]
-		# return NR_CLASSES_DICT.get(dataset_key, 1)
-		return 1
+    @staticmethod
+    def get_nr_classes(weights):
+        # dataset_key = weights.name.split("_")[1]
+        # return NR_CLASSES_DICT.get(dataset_key, 1)
+        return 1
 
-	@staticmethod
-	def build_model(name, weights, num_classes, loss="softmax", pretrained=True, use_gpu=True):
-		if name not in MODEL_FACTORY:
-			available = list(MODEL_FACTORY.keys())
-			raise KeyError(f"Unknown model '{name}'. Must be one of {available}")
-		return MODEL_FACTORY[name](
-			num_classes=num_classes, loss=loss, pretrained=pretrained, use_gpu=use_gpu
-		)
+    @staticmethod
+    def build_model(
+        name,
+        weights,
+        num_classes,
+        loss="softmax",
+        pretrained=True,
+        use_gpu=True,
+    ):
+        if name not in MODEL_FACTORY:
+            available = list(MODEL_FACTORY.keys())
+            raise KeyError(
+                f"Unknown model '{name}'. Must be one of {available}"
+            )
+        return MODEL_FACTORY[name](
+            num_classes=num_classes,
+            loss=loss,
+            pretrained=pretrained,
+            use_gpu=use_gpu,
+        )
diff --git a/ethology/reid/core/reid_handler.py b/ethology/reid/core/reid_handler.py
index 2c72658a..62d42209 100644
--- a/ethology/reid/core/reid_handler.py
+++ b/ethology/reid/core/reid_handler.py
@@ -1,28 +1,33 @@
-
 from pathlib import Path
-from typing import Union
+
 import numpy as np
+
 from ethology.reid.core.auto_backend import ReidAutoBackend
 
+
 class ReID:
-	def __init__(self, weights: Union[str, Path], device='cpu', half=False):
-		self.weights = Path(weights)
-		self.device = device
-		self.half = half
-		self.backend = ReidAutoBackend(weights=self.weights, device=device, half=half)
-		self.model = self.backend.model
-
-	def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
-		"""
-		Extract features for detections in a frame.
-		Args:
-			frame: (H, W, C) BGR image
-			dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
-		Returns:
-			embs: (N, D) embeddings.
-		"""
-		if dets.shape[0] == 0:
-			return np.empty((0, 0))
-		xyxy = dets[:, :4]
-		embs = self.model.get_features(xyxy, frame)
-		return embs
+    def __init__(self, weights: str | Path, device="cpu", half=False):
+        self.weights = Path(weights)
+        self.device = device
+        self.half = half
+        self.backend = ReidAutoBackend(
+            weights=self.weights, device=device, half=half
+        )
+        self.model = self.backend.model
+
+    def __call__(self, frame: np.ndarray, dets: np.ndarray) -> np.ndarray:
+        """Extract features for detections in a frame.
+
+        Args:
+                frame: (H, W, C) BGR image
+                dets: (N, 6) detections (x1, y1, x2, y2, conf, cls) or similar.
+
+        Returns:
+                embs: (N, D) embeddings.
+
+        """
+        if dets.shape[0] == 0:
+            return np.empty((0, 0))
+        xyxy = dets[:, :4]
+        embs = self.model.get_features(xyxy, frame)
+        return embs
diff --git a/examples/reid_mot_example.py b/examples/reid_mot_example.py
index 3862e1dd..304fd9b1 100644
--- a/examples/reid_mot_example.py
+++ b/examples/reid_mot_example.py
@@ -1,5 +1,4 @@
-"""
-Example: Using the new ReID trajectory utility with MOT results
+"""Example: Using the new ReID trajectory utility with MOT results
 
 This script demonstrates how to use the ethology reid trajectory handler with a sample MOT output.
 """
@@ -8,16 +7,16 @@
 
 # Example: Dummy MOT results (replace with your actual MOT output)
 mot_results = [
-    {'id': 1, 'trajectory': [(0, 0), (1, 1), (2, 2)]},
-    {'id': 2, 'trajectory': [(5, 5), (6, 6), (7, 7)]},
+    {"id": 1, "trajectory": [(0, 0), (1, 1), (2, 2)]},
+    {"id": 2, "trajectory": [(5, 5), (6, 6), (7, 7)]},
 ]
 
 # Initialize the handler (adjust parameters as needed)
-reid_handler = ReIDTrajectoryHandler(model_name='osnet', device='cpu')
+reid_handler = ReIDTrajectoryHandler(model_name="osnet", device="cpu")
 
 # Run re-identification on the MOT results
 reid_results = reid_handler.reidentify(mot_results)
 
-print('ReID Results:')
+print("ReID Results:")
 for item in reid_results:
     print(item)
diff --git a/tests/test_unit/test_reid_handler.py b/tests/test_unit/test_reid_handler.py
index 3a5146cf..bc8a199c 100644
--- a/tests/test_unit/test_reid_handler.py
+++ b/tests/test_unit/test_reid_handler.py
@@ -1,12 +1,16 @@
 import numpy as np
+
 from ethology.reid.core.handler import ReIDHandler
 
+
 def test_extract_features_shape():
-    handler = ReIDHandler(weights='osnet_x0_25_imagenet.pth')
+    handler = ReIDHandler(weights="osnet_x0_25_imagenet.pth")
     frame = np.random.randint(0, 255, (128, 64, 3), dtype=np.uint8)
-    dets = np.array([
-        [10, 10, 50, 100, 0.9, 1],
-        [60, 20, 100, 110, 0.8, 2],
-    ])
+    dets = np.array(
+        [
+            [10, 10, 50, 100, 0.9, 1],
+            [60, 20, 100, 110, 0.8, 2],
+        ]
+    )
     feats = handler.extract_features(frame, dets)
     assert feats.shape[0] == dets.shape[0]

From a377384b42706a5185ceb6a602dae899816fab96 Mon Sep 17 00:00:00 2001
From: AnandMayank <anandmayank698@gmail.com>
Date: Wed, 18 Feb 2026 20:16:58 +0530
Subject: [PATCH 10/12] fix(reid): address linter and docstring issues in
 resnet, mlfn, mobilenetv2 backbones

---
 ethology/reid/backbones/mlfn.py        | 13 ++--
 ethology/reid/backbones/mobilenetv2.py | 82 ++++++++++++++------------
 ethology/reid/backbones/resnet.py      |  5 +-
 3 files changed, 54 insertions(+), 46 deletions(-)

diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
index 3d04d003..7e490d0a 100644
--- a/ethology/reid/backbones/mlfn.py
+++ b/ethology/reid/backbones/mlfn.py
@@ -5,6 +5,10 @@
 from torch import nn
 from torch.nn import functional as F
 
+"""
+MLFN backbone for person re-identification.
+"""
+ 
 __all__ = ["mlfn"]
 model_urls = {
     # training epoch = 5, top1 = 51.6
@@ -21,7 +25,7 @@ def __init__(
         mid_channels = out_channels // 2
 
         # Factor Modules
-        self.fm_conv1 = nn.Conv2d(in_channels, mid_channels, 1, bias=False)
+        super().__init__()
         self.fm_bn1 = nn.BatchNorm2d(mid_channels)
         self.fm_conv2 = nn.Conv2d(
             mid_channels,
@@ -114,7 +118,7 @@ def __init__(
         **kwargs,
     ):
         super(MLFN, self).__init__()
-        self.loss = loss
+        channels=None,
         self.groups = groups
 
         # first convolutional layer
@@ -264,9 +268,10 @@ def mlfn(num_classes, loss="softmax", pretrained=True, **kwargs):
         warnings.warn(
             "The imagenet pretrained weights need to be manually downloaded from {}".format(
                 model_urls["imagenet"]
-            )
+            ),
+            stacklevel=2,
         )
     return model
 
 
-# Copied from boxmot/boxmot/reid/backbones/mlfn.py
+
diff --git a/ethology/reid/backbones/mobilenetv2.py b/ethology/reid/backbones/mobilenetv2.py
index b3e69186..5d004eb3 100644
--- a/ethology/reid/backbones/mobilenetv2.py
+++ b/ethology/reid/backbones/mobilenetv2.py
@@ -1,4 +1,7 @@
-# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
+
+"""
+MobileNetV2 backbone for person re-identification.
+"""
 
 
 import torch.utils.model_zoo as model_zoo
@@ -29,7 +32,10 @@ class ConvBlock(nn.Module):
             g (int): number of blocked connections from input channels
                     to output channels (default: 1).
 
-    """
+	def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
+		super().__init__()
+		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
+		self.bn = nn.BatchNorm2d(out_c)
 
     def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
         super(ConvBlock, self).__init__()
@@ -43,18 +49,18 @@ def forward(self, x):
 
 
 class Bottleneck(nn.Module):
-    def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
-        super(Bottleneck, self).__init__()
-        mid_channels = in_channels * expansion_factor
-        self.use_residual = stride == 1 and in_channels == out_channels
-        self.conv1 = ConvBlock(in_channels, mid_channels, 1)
-        self.dwconv2 = ConvBlock(
-            mid_channels, mid_channels, 3, stride, 1, g=mid_channels
-        )
-        self.conv3 = nn.Sequential(
-            nn.Conv2d(mid_channels, out_channels, 1, bias=False),
-            nn.BatchNorm2d(out_channels),
-        )
+	def __init__(self, in_channels, out_channels, expansion_factor, stride=1):
+		super().__init__()
+		mid_channels = in_channels * expansion_factor
+		self.use_residual = stride == 1 and in_channels == out_channels
+		self.conv1 = ConvBlock(in_channels, mid_channels, 1)
+		self.dwconv2 = ConvBlock(
+			mid_channels, mid_channels, 3, stride, 1, g=mid_channels
+		)
+		self.conv3 = nn.Sequential(
+			nn.Conv2d(mid_channels, out_channels, 1, bias=False),
+			nn.BatchNorm2d(out_channels),
+		)
 
     def forward(self, x):
         m = self.conv1(x)
@@ -78,19 +84,19 @@ class MobileNetV2(nn.Module):
             - ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
     """
 
-    def __init__(
-        self,
-        num_classes,
-        width_mult=1,
-        loss="softmax",
-        fc_dims=None,
-        dropout_p=None,
-        **kwargs,
-    ):
-        super(MobileNetV2, self).__init__()
-        self.loss = loss
-        self.in_channels = int(32 * width_mult)
-        self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
+	def __init__(
+		self,
+		num_classes,
+		width_mult=1,
+		loss="softmax",
+		fc_dims=None,
+		dropout_p=None,
+		**kwargs,
+	):
+		super().__init__()
+		self.loss = loss
+		self.in_channels = int(32 * width_mult)
+		self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
 
         # construct layers
         self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
@@ -125,17 +131,17 @@ def __init__(
 
         self._init_params()
 
-    def _make_layer(self, block, t, c, n, s):
-        # t: expansion factor
-        # c: output channels
-        # n: number of blocks
-        # s: stride for first layer
-        layers = []
-        layers.append(block(self.in_channels, c, t, s))
-        self.in_channels = c
-        for i in range(1, n):
-            layers.append(block(self.in_channels, c, t))
-        return nn.Sequential(*layers)
+	def _make_layer(self, block, t, c, n, s):
+		# t: expansion factor
+		# c: output channels
+		# n: number of blocks
+		# s: stride for first layer
+		layers = []
+		layers.append(block(self.in_channels, c, t, s))
+		self.in_channels = c
+		for _ in range(1, n):
+			layers.append(block(self.in_channels, c, t))
+		return nn.Sequential(*layers)
 
     def _construct_fc_layer(self, fc_dims, input_dim, dropout_p=None):
         """Constructs fully connected layer.
diff --git a/ethology/reid/backbones/resnet.py b/ethology/reid/backbones/resnet.py
index 7cf28df1..0efa708e 100644
--- a/ethology/reid/backbones/resnet.py
+++ b/ethology/reid/backbones/resnet.py
@@ -1,13 +1,10 @@
 # Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
 
-<<<<<<< HEAD
-"""Code source: https://github.com/pytorch/vision"""
-=======
+
 """
 Code source: https://github.com/pytorch/vision
 """
 from __future__ import absolute_import, division
->>>>>>> a4dd694 (style(reid): fix ruff errors in hacnn.py and mlfn.py\n\n- Add missing docstrings\n- Use super() instead of super(Class, self)\n- Avoid mutable default arguments\n- Fix long lines and other ruff issues)
 
 import torch.utils.model_zoo as model_zoo
 from torch import nn

From d1a8ac749b8e6ef40bfcebcc8fa4e29a74b51a80 Mon Sep 17 00:00:00 2001
From: AnandMayank <anandmayank698@gmail.com>
Date: Wed, 18 Feb 2026 20:26:55 +0530
Subject: [PATCH 11/12] fix(reid): resolve linter, docstring, and merge issues
 in backbone files

---
 ethology/reid/backbones/mlfn.py        | 17 +++++-----
 ethology/reid/backbones/mobilenetv2.py | 43 ++++++++++++--------------
 ethology/reid/backbones/resnet.py      | 26 ++--------------
 3 files changed, 29 insertions(+), 57 deletions(-)

diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
index 7e490d0a..bdd019e1 100644
--- a/ethology/reid/backbones/mlfn.py
+++ b/ethology/reid/backbones/mlfn.py
@@ -1,3 +1,4 @@
+
 """MLFN backbone for person re-identification."""
 
 import torch
@@ -5,10 +6,6 @@
 from torch import nn
 from torch.nn import functional as F
 
-"""
-MLFN backbone for person re-identification.
-"""
- 
 __all__ = ["mlfn"]
 model_urls = {
     # training epoch = 5, top1 = 51.6
@@ -20,7 +17,7 @@ class MLFNBlock(nn.Module):
     def __init__(
         self, in_channels, out_channels, stride, fsm_channels, groups=32
     ):
-        super(MLFNBlock, self).__init__()
+        super().__init__()
         self.groups = groups
         mid_channels = out_channels // 2
 
@@ -113,11 +110,13 @@ def __init__(
         num_classes,
         loss="softmax",
         groups=32,
-        channels=[64, 256, 512, 1024, 2048],
+        channels=None,
         embed_dim=1024,
         **kwargs,
     ):
-        super(MLFN, self).__init__()
+        super().__init__()
+        if channels is None:
+            channels = [64, 256, 512, 1024, 2048]
         channels=None,
         self.groups = groups
 
@@ -244,9 +243,9 @@ def forward(self, x):
 
 
 def init_pretrained_weights(model, model_url):
-    """Initializes model with pretrained weights.
+    """Initialize model with pretrained weights.
 
-    Layers that don't match with pretrained layers in name or size are kept unchanged.
+    Keep layers unchanged if they don't match pretrained layers in name or size.
     """
     pretrain_dict = model_zoo.load_url(model_url)
     model_dict = model.state_dict()
diff --git a/ethology/reid/backbones/mobilenetv2.py b/ethology/reid/backbones/mobilenetv2.py
index 5d004eb3..d0f5becd 100644
--- a/ethology/reid/backbones/mobilenetv2.py
+++ b/ethology/reid/backbones/mobilenetv2.py
@@ -37,12 +37,7 @@ def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
 		self.conv = nn.Conv2d(in_c, out_c, k, stride=s, padding=p, bias=False, groups=g)
 		self.bn = nn.BatchNorm2d(out_c)
 
-    def __init__(self, in_c, out_c, k, s=1, p=0, g=1):
-        super(ConvBlock, self).__init__()
-        self.conv = nn.Conv2d(
-            in_c, out_c, k, stride=s, padding=p, bias=False, groups=g
-        )
-        self.bn = nn.BatchNorm2d(out_c)
+    # Only keep the correct __init__
 
     def forward(self, x):
         return F.relu6(self.bn(self.conv(x)))
@@ -73,30 +68,30 @@ def forward(self, x):
 
 
 class MobileNetV2(nn.Module):
-    """MobileNetV2.
+    """
+    MobileNetV2 backbone for person re-identification.
 
     Reference:
-            Sandler et al. MobileNetV2: Inverted Residuals and
-            Linear Bottlenecks. CVPR 2018.
+        Sandler et al. MobileNetV2: Inverted Residuals and Linear Bottlenecks. CVPR 2018.
 
     Public keys:
-            - ``mobilenetv2_x1_0``: MobileNetV2 x1.0.
-            - ``mobilenetv2_x1_4``: MobileNetV2 x1.4.
+        - mobilenetv2_x1_0: MobileNetV2 x1.0.
+        - mobilenetv2_x1_4: MobileNetV2 x1.4.
     """
 
-	def __init__(
-		self,
-		num_classes,
-		width_mult=1,
-		loss="softmax",
-		fc_dims=None,
-		dropout_p=None,
-		**kwargs,
-	):
-		super().__init__()
-		self.loss = loss
-		self.in_channels = int(32 * width_mult)
-		self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
+    def __init__(
+        self,
+        num_classes,
+        width_mult=1,
+        loss="softmax",
+        fc_dims=None,
+        dropout_p=None,
+        **kwargs,
+    ):
+        super().__init__()
+        self.loss = loss
+        self.in_channels = int(32 * width_mult)
+        self.feature_dim = int(1280 * width_mult) if width_mult > 1 else 1280
 
         # construct layers
         self.conv1 = ConvBlock(3, self.in_channels, 3, s=2, p=1)
diff --git a/ethology/reid/backbones/resnet.py b/ethology/reid/backbones/resnet.py
index 0efa708e..8ee75172 100644
--- a/ethology/reid/backbones/resnet.py
+++ b/ethology/reid/backbones/resnet.py
@@ -1,16 +1,13 @@
-# Mikel Broström 🔥 BoxMOT 🧾 AGPL-3.0 license
-
 
 """
 Code source: https://github.com/pytorch/vision
 """
-from __future__ import absolute_import, division
 
+from __future__ import absolute_import, division
 import torch.utils.model_zoo as model_zoo
 from torch import nn
 
 __all__ = [
-<<<<<<< HEAD
     "resnet18",
     "resnet34",
     "resnet50",
@@ -29,26 +26,7 @@
     "resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
     "resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
     "resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
-=======
-	"resnet18",
-	"resnet34",
-	"resnet50",
-	"resnet101",
-	"resnet152",
-	"resnext50_32x4d",
-	"resnext101_32x8d",
-	"resnet50_fc512",
-]
-
-model_urls = {
-	"resnet18": "https://download.pytorch.org/models/resnet18-5c106cde.pth",
-	"resnet34": "https://download.pytorch.org/models/resnet34-333f7ec4.pth",
-	"resnet50": "https://download.pytorch.org/models/resnet50-19c8e357.pth",
-	"resnet101": "https://download.pytorch.org/models/resnet101-5d3b4d8f.pth",
-	"resnet152": "https://download.pytorch.org/models/resnet152-b121ed2d.pth",
-	"resnext50_32x4d": "https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth",
-	"resnext101_32x8d": "https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth",
->>>>>>> a4dd694 (style(reid): fix ruff errors in hacnn.py and mlfn.py\n\n- Add missing docstrings\n- Use super() instead of super(Class, self)\n- Avoid mutable default arguments\n- Fix long lines and other ruff issues)
+}
 }
 
 # ...existing code for conv3x3, conv1x1, BasicBlock, Bottleneck, ResNet, init_pretrained_weights, and instantiation functions...

From d572f490159473af1d34045aa9d8465a163340b5 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 18 Feb 2026 14:57:54 +0000
Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 ethology/reid/backbones/mlfn.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/ethology/reid/backbones/mlfn.py b/ethology/reid/backbones/mlfn.py
index bdd019e1..c638a5d5 100644
--- a/ethology/reid/backbones/mlfn.py
+++ b/ethology/reid/backbones/mlfn.py
@@ -1,4 +1,3 @@
-
 """MLFN backbone for person re-identification."""
 
 import torch
@@ -117,7 +116,7 @@ def __init__(
         super().__init__()
         if channels is None:
             channels = [64, 256, 512, 1024, 2048]
-        channels=None,
+        channels = (None,)
         self.groups = groups
 
         # first convolutional layer
@@ -271,6 +270,3 @@ def mlfn(num_classes, loss="softmax", pretrained=True, **kwargs):
             stacklevel=2,
         )
     return model
-
-
-