Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

πŸš€ Add MVTecAD2 dataset #2562

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
185ae8f
Rename MVTec dataset to MVTecAD dataset
samet-akcay Feb 10, 2025
ffb2f92
Update the test dataset paths
samet-akcay Feb 10, 2025
2199f80
Rename mvtec path to mvtec_ad
samet-akcay Feb 11, 2025
49e7d35
Rename mvtec_ad to mvtecad
samet-akcay Feb 11, 2025
8db23a1
Path fixes
samet-akcay Feb 11, 2025
da1c9d1
Path fixes
samet-akcay Feb 11, 2025
3279c9d
Merge branch 'main' into feature/data/mvtec2-ad
samet-akcay Feb 11, 2025
ec69b59
Add MVTecAD2 dataset and datamodule
samet-akcay Feb 12, 2025
1f10185
Fix engine unit tests
samet-akcay Feb 12, 2025
8713528
Fix mvtec path in inference tests
samet-akcay Feb 13, 2025
b37abee
Merge branch 'main' into feature/data/mvtec2-ad
samet-akcay Mar 5, 2025
f650629
Rename MVTec dataset path in tests
samet-akcay Mar 5, 2025
786c0c3
Fix mvtec path in conftest
samet-akcay Mar 6, 2025
0ce1810
remove redundant relative import
samet-akcay Mar 6, 2025
a13ad01
Increase the timeout
samet-akcay Mar 6, 2025
f526a47
Fix unit tests
samet-akcay Mar 6, 2025
8e49f67
Merge branch 'main' of github.com:openvinotoolkit/anomalib into featu…
samet-akcay Mar 6, 2025
bf075ab
Merge branch 'main' of github.com:openvinotoolkit/anomalib into imple…
samet-akcay Mar 6, 2025
b9e6488
Merge MVTecAD rename changes
samet-akcay Mar 6, 2025
6092504
Fix mvtec2 config file example
samet-akcay Mar 6, 2025
686ad7c
Merge main and resolve conflicts
samet-akcay Mar 6, 2025
11a43cf
Update the test split docstring
samet-akcay Mar 6, 2025
15d5958
Merge main and resolve conflicts
samet-akcay Mar 26, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 147 additions & 0 deletions examples/api/02_data/mvtecad2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
"""Example showing how to use the MVTec AD 2 dataset with Anomalib.

This example demonstrates how to:
1. Load and visualize the MVTec AD 2 dataset
2. Create a datamodule and use it for training
3. Access different test sets (public, private, mixed)
4. Work with custom transforms and visualization
"""

# Copyright (C) 2025 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import matplotlib.pyplot as plt
import torch
from torch.utils.data import DataLoader
from torchvision.transforms.v2 import Compose, Resize, ToDtype, ToImage

from anomalib.data import MVTecAD2
from anomalib.data.datasets.base.image import ImageItem
from anomalib.data.datasets.image.mvtecad2 import MVTecAD2Dataset, TestType
from anomalib.data.utils import Split

# 1. Basic Usage
print("1. Basic Usage")
datamodule = MVTecAD2(
root="./datasets/MVTec_AD_2",
category="sheet_metal",
train_batch_size=32,
eval_batch_size=32,
num_workers=8,
)
datamodule.setup() # This will prepare the dataset

# Print some information about the splits
print(f"Number of training samples: {len(datamodule.train_data)}")
print(f"Number of validation samples: {len(datamodule.val_data)}")
print(f"Number of test samples (public): {len(datamodule.test_public_data)}")
print(f"Number of test samples (private): {len(datamodule.test_private_data)}")
print(f"Number of test samples (private mixed): {len(datamodule.test_private_mixed_data)}")

# 2. Custom Transforms
print("\n2. Custom Transforms")
transform = Compose([
ToImage(),
Resize((256, 256)),
ToDtype(torch.float32, scale=True),
])

# Create dataset with custom transform
datamodule = MVTecAD2(
root="./datasets/MVTec_AD_2",
category="sheet_metal",
train_augmentations=transform,
val_augmentations=transform,
test_augmentations=transform,
)
datamodule.setup()

# 3. Different Test Sets
print("\n3. Accessing Different Test Sets")

# Get loaders for each test set
public_loader = datamodule.test_dataloader(test_type=TestType.PUBLIC)
private_loader = datamodule.test_dataloader(test_type=TestType.PRIVATE)
mixed_loader = datamodule.test_dataloader(test_type=TestType.PRIVATE_MIXED)

# Get sample batches
public_batch = next(iter(public_loader))
private_batch = next(iter(private_loader))
mixed_batch = next(iter(mixed_loader))

print("Public test batch shape:", public_batch.image.shape)
print("Private test batch shape:", private_batch.image.shape)
print("Private mixed test batch shape:", mixed_batch.image.shape)

# 4. Advanced Usage - Direct Dataset Access
print("\n4. Advanced Usage")

# Create datasets for each split
train_dataset = MVTecAD2Dataset(
root="./datasets/MVTec_AD_2",
category="sheet_metal",
split=Split.TRAIN,
augmentations=transform,
)

test_dataset = MVTecAD2Dataset(
root="./datasets/MVTec_AD_2",
category="sheet_metal",
split=Split.TEST,
test_type=TestType.PUBLIC, # Use public test set
augmentations=transform,
)

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=train_dataset.collate_fn)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=test_dataset.collate_fn)

# Get some sample images
train_samples = next(iter(train_loader))
test_samples = next(iter(test_loader))

print("Train Dataset:")
print(f"- Number of samples: {len(train_dataset)}")
print(f"- Image shape: {train_samples.image.shape}")
print(f"- Labels: {train_samples.gt_label}")

print("\nTest Dataset:")
print(f"- Number of samples: {len(test_dataset)}")
print(f"- Image shape: {test_samples.image.shape}")
print(f"- Labels: {test_samples.gt_label}")
if hasattr(test_samples, "gt_mask") and test_samples.gt_mask is not None:
print(f"- Mask shape: {test_samples.gt_mask.shape}")


# 5. Visualize some samples
def show_samples(samples: ImageItem, title: str) -> None:
"""Helper function to display samples."""
if samples.image is None or samples.gt_label is None:
msg = "Samples must have image and label data"
raise ValueError(msg)

fig, axes = plt.subplots(1, 4, figsize=(15, 4))
fig.suptitle(title)

for i in range(4):
img = samples.image[i].permute(1, 2, 0).numpy()
axes[i].imshow(img)
axes[i].axis("off")
if hasattr(samples, "gt_mask") and samples.gt_mask is not None:
mask = samples.gt_mask[i].squeeze().numpy()
axes[i].imshow(mask, alpha=0.3, cmap="Reds")
label = "Normal" if samples.gt_label[i] == 0 else "Anomaly"
axes[i].set_title(label)

plt.tight_layout()
plt.show()


# Show training samples (normal only)
show_samples(train_samples, "Training Samples (Normal)")

# Show test samples (mix of normal and anomalous)
show_samples(test_samples, "Test Samples (Normal + Anomalous)")

if __name__ == "__main__":
print("\nMVTec AD 2 Dataset example completed successfully!")
9 changes: 9 additions & 0 deletions examples/configs/data/mvtecad2.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
class_path: anomalib.data.MVTecAD2
init_args:
root: "./datasets/MVTec_AD_2"
category: "sheet_metal"
train_batch_size: 32
eval_batch_size: 32
num_workers: 8
test_type: "public"
seed: null
4 changes: 3 additions & 1 deletion src/anomalib/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
# Datamodules
from .datamodules.base import AnomalibDataModule
from .datamodules.depth import DepthDataFormat, Folder3D, MVTec3D
from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTecAD, Visa
from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTec, MVTecAD, MVTecAD2, Visa
from .datamodules.video import Avenue, ShanghaiTech, UCSDped, VideoDataFormat

# Datasets
Expand Down Expand Up @@ -160,8 +160,10 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule
"ImageDataFormat",
"Kolektor",
"KolektorDataset",
"MVTec", # Deprecated, use MVTecAD instead
"MVTecAD",
"MVTecADDataset",
"MVTecAD2",
"Visa",
"VisaDataset",
# Video
Expand Down
3 changes: 3 additions & 0 deletions src/anomalib/data/datamodules/base/image.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,9 @@ def _create_val_split(self) -> None:
This handles sampling from train/test sets and optionally creating
synthetic anomalies.
"""
if self.val_split_mode == ValSplitMode.FROM_DIR:
# If the validation split mode is FROM_DIR, we don't need to create a validation set
return
if self.val_split_mode == ValSplitMode.FROM_TRAIN:
# randomly sample from train set
self.train_data, self.val_data = random_split(
Expand Down
7 changes: 6 additions & 1 deletion src/anomalib/data/datamodules/image/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
- ``Folder``: Custom folder structure with normal/abnormal images
- ``Kolektor``: Kolektor Surface-Defect Dataset
- ``MVTecAD``: MVTec Anomaly Detection Dataset
- ``MVTecAD2``: MVTec Anomaly Detection Dataset 2
- ``Visa``: Visual Inspection for Steel Anomaly Dataset

Example:
Expand All @@ -30,6 +31,7 @@
from .folder import Folder
from .kolektor import Kolektor
from .mvtecad import MVTec, MVTecAD
from .mvtecad2 import MVTecAD2
from .visa import Visa


Expand All @@ -44,6 +46,7 @@ class ImageDataFormat(str, Enum):
- ``FOLDER_3D``: Custom folder structure for 3D images
- ``KOLEKTOR``: Kolektor Surface-Defect Dataset
- ``MVTEC_AD``: MVTec AD Dataset
- ``MVTEC_AD_2``: MVTec AD 2 Dataset
- ``MVTEC_3D``: MVTec 3D AD Dataset
- ``VISA``: Visual Inspection for Steel Anomaly Dataset
"""
Expand All @@ -54,6 +57,7 @@ class ImageDataFormat(str, Enum):
FOLDER_3D = "folder_3d"
KOLEKTOR = "kolektor"
MVTEC_AD = "mvtecad"
MVTEC_AD_2 = "mvtecad2"
MVTEC_3D = "mvtec_3d"
VISA = "visa"

Expand All @@ -63,7 +67,8 @@ class ImageDataFormat(str, Enum):
"Datumaro",
"Folder",
"Kolektor",
"MVTec", # Include MVTec for backward compatibility
"MVTecAD",
"MVTec", # Include both for backward compatibility
"MVTecAD2",
"Visa",
]
Loading
Loading