From 185ae8f3bee90adb16d0614bd34a2ef0145dd4af Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Mon, 10 Feb 2025 14:04:36 +0000 Subject: [PATCH 01/19] Rename MVTec dataset to MVTecAD dataset Signed-off-by: Samet Akcay --- .github/ISSUE_TEMPLATE/bug_report.yaml | 2 +- README.md | 20 +- docs/source/markdown/get_started/anomalib.md | 2 +- docs/source/markdown/get_started/migration.md | 4 +- .../guides/how_to/data/datamodules.md | 4 +- .../markdown/guides/how_to/data/datasets.md | 6 +- .../guides/how_to/data/input_tiling.md | 8 +- .../markdown/guides/how_to/data/transforms.md | 18 +- .../how_to/models/feature_extractors.md | 4 +- .../how_to/training_on_intel_gpus/index.md | 6 +- .../reference/data/datamodules/image.md | 6 +- .../reference/data/datamodules/image/index.md | 4 +- .../reference/data/datamodules/image/mvtec.md | 2 +- .../reference/data/datamodules/index.md | 2 +- .../markdown/guides/reference/data/index.md | 2 +- .../reference/pipelines/benchmark/index.md | 2 +- .../folder/classification/cli/default.yaml | 2 +- .../data/transforms/datamodule_custom.txt | 2 +- .../transforms/datamodule_custom_cli.yaml | 2 +- .../data/transforms/datamodule_default.txt | 4 +- .../data/transforms/datamodule_train_eval.txt | 2 +- .../transforms/datamodule_train_eval_cli.yaml | 2 +- .../snippets/data/transforms/inference.txt | 2 +- .../data/transforms/inference_cli.yaml | 2 +- .../snippets/data/transforms/model_fit.txt | 2 +- .../snippets/data/transforms/model_fit_cli.sh | 2 +- .../snippets/inference/cli/lightning.txt | 4 +- docs/source/snippets/train/api/default.txt | 4 +- docs/source/snippets/train/cli/default.txt | 4 +- .../api/01_getting_started/basic_training.py | 6 +- examples/api/02_data/mvtec.py | 12 +- examples/api/03_models/efficient_ad.py | 6 +- examples/api/03_models/padim.py | 4 +- examples/api/03_models/patchcore.py | 4 +- examples/api/04_advanced/loggers.py | 4 +- .../api/05_pipelines/complete_pipeline.py | 4 +- .../cli/01_getting_started/basic_training.sh | 4 +- examples/cli/02_data/mvtec.sh | 10 +- examples/cli/04_advanced/custom_pipeline.sh | 6 +- examples/configs/data/mvtec.yaml | 2 +- .../001_getting_started.ipynb | 4 +- .../notebooks/100_datamodules/102_mvtec.ipynb | 4 +- examples/notebooks/100_datamodules/README.md | 2 +- .../notebooks/200_models/201_fastflow.ipynb | 4 +- .../600_loggers/601_mlflow_logging.ipynb | 4 +- .../notebooks/700_metrics/701a_aupimo.ipynb | 4 +- .../700_metrics/701b_aupimo_advanced_i.ipynb | 4 +- .../700_metrics/701c_aupimo_advanced_ii.ipynb | 4 +- examples/notebooks/README.md | 2 +- src/anomalib/cli/cli.py | 4 +- src/anomalib/data/__init__.py | 96 +++---- .../data/datamodules/image/__init__.py | 29 ++- src/anomalib/data/datamodules/image/mvtec.py | 62 +++-- .../data/datamodules/image/mvtec_ad.py | 236 +++++++++++++++++ src/anomalib/data/datasets/__init__.py | 10 +- src/anomalib/data/datasets/image/__init__.py | 10 +- src/anomalib/data/datasets/image/mvtec.py | 26 +- src/anomalib/data/datasets/image/mvtec_ad.py | 240 ++++++++++++++++++ src/anomalib/data/datasets/image/visa.py | 2 +- src/anomalib/engine/engine.py | 8 +- src/anomalib/models/__init__.py | 4 +- src/anomalib/models/image/__init__.py | 4 +- src/anomalib/models/image/cfa/README.md | 2 +- src/anomalib/models/image/cfa/__init__.py | 4 +- src/anomalib/models/image/cflow/README.md | 2 +- src/anomalib/models/image/csflow/README.md | 2 +- src/anomalib/models/image/dfkde/README.md | 2 +- src/anomalib/models/image/dfm/README.md | 2 +- src/anomalib/models/image/draem/README.md | 2 +- src/anomalib/models/image/dsr/README.md | 2 +- .../image/efficient_ad/lightning_model.py | 4 +- src/anomalib/models/image/fastflow/README.md | 2 +- .../models/image/fastflow/__init__.py | 4 +- .../models/image/fastflow/lightning_model.py | 4 +- src/anomalib/models/image/fre/__init__.py | 4 +- .../models/image/fre/lightning_model.py | 4 +- src/anomalib/models/image/ganomaly/README.md | 2 +- .../models/image/ganomaly/__init__.py | 4 +- .../models/image/ganomaly/lightning_model.py | 4 +- src/anomalib/models/image/padim/README.md | 2 +- .../models/image/padim/lightning_model.py | 8 +- src/anomalib/models/image/patchcore/README.md | 2 +- .../models/image/patchcore/__init__.py | 4 +- .../models/image/patchcore/lightning_model.py | 8 +- .../image/reverse_distillation/README.md | 2 +- .../image/reverse_distillation/__init__.py | 4 +- .../reverse_distillation/lightning_model.py | 4 +- src/anomalib/models/image/stfpm/README.md | 4 +- src/anomalib/models/image/stfpm/__init__.py | 4 +- .../models/image/stfpm/lightning_model.py | 8 +- .../models/image/supersimplenet/README.md | 6 +- .../image/supersimplenet/lightning_model.py | 4 +- src/anomalib/models/image/uflow/README.md | 10 +- src/anomalib/models/image/uflow/__init__.py | 4 +- .../models/image/uflow/lightning_model.py | 8 +- src/anomalib/models/image/vlm_ad/__init__.py | 4 +- .../models/image/vlm_ad/lightning_model.py | 4 +- src/anomalib/models/image/winclip/README.md | 40 +-- .../models/image/winclip/lightning_model.py | 4 +- src/anomalib/pipelines/benchmark/__init__.py | 4 +- src/anomalib/pipelines/benchmark/job.py | 8 +- src/anomalib/pipelines/benchmark/pipeline.py | 8 +- src/anomalib/utils/path.py | 4 +- tests/conftest.py | 6 +- tests/helpers/data.py | 2 +- tests/integration/cli/test_cli.py | 23 +- tests/integration/model/test_models.py | 4 +- tests/integration/pipelines/pipeline.yaml | 2 +- .../tools/upgrade/expected_draem_v1.yaml | 4 +- .../tools/upgrade/original_draem_v0.yaml | 2 +- .../unit/data/datamodule/image/test_mvtec.py | 10 +- tests/unit/engine/test_engine.py | 4 +- .../visualizer_callback/test_visualizer.py | 4 +- tests/unit/utils/test_visualizer.py | 4 +- tools/upgrade/config.py | 4 +- 115 files changed, 877 insertions(+), 351 deletions(-) create mode 100644 src/anomalib/data/datamodules/image/mvtec_ad.py create mode 100644 src/anomalib/data/datasets/image/mvtec_ad.py diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml index a8fcc1a8f4..ce930eb926 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.yaml +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -22,7 +22,7 @@ body: - "Avenue" - "BTec" - "Folder" - - "MVTec" + - "MVTecAD" - "UCSD" - "Other (please specify in the text field below)" validations: diff --git a/README.md b/README.md index d9e9a14da7..d79a279a75 100644 --- a/README.md +++ b/README.md @@ -122,12 +122,12 @@ Anomalib supports both API and CLI-based training approaches: ## πŸ”Œ Python API ```python -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.models import Patchcore from anomalib.engine import Engine # Initialize components -datamodule = MVTec() +datamodule = MVTecAD() model = Patchcore() engine = Engine() @@ -139,10 +139,10 @@ engine.fit(datamodule=datamodule, model=model) ```bash # Train with default settings -anomalib train --model Patchcore --data anomalib.data.MVTec +anomalib train --model Patchcore --data anomalib.data.MVTecAD # Train with custom category -anomalib train --model Patchcore --data anomalib.data.MVTec --data.category transistor +anomalib train --model Patchcore --data anomalib.data.MVTecAD --data.category transistor # Train with config file anomalib train --config path/to/config.yaml @@ -168,12 +168,12 @@ predictions = engine.predict( ```bash # Basic prediction anomalib predict --model anomalib.models.Patchcore \ - --data anomalib.data.MVTec \ + --data anomalib.data.MVTecAD \ --ckpt_path path/to/model.ckpt # Prediction with results anomalib predict --model anomalib.models.Patchcore \ - --data anomalib.data.MVTec \ + --data anomalib.data.MVTecAD \ --ckpt_path path/to/model.ckpt \ --return_predictions ``` @@ -191,7 +191,7 @@ Ensure that you have PyTorch with XPU support installed. For more information, p ## πŸ”Œ API ```python -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine, SingleXPUStrategy, XPUAccelerator from anomalib.models import Stfpm @@ -199,13 +199,13 @@ engine = Engine( strategy=SingleXPUStrategy(), accelerator=XPUAccelerator(), ) -engine.train(Stfpm(), datamodule=MVTec()) +engine.train(Stfpm(), datamodule=MVTecAD()) ``` ## ⌨️ CLI ```bash -anomalib train --model Padim --data MVTec --trainer.accelerator xpu --trainer.strategy xpu_single +anomalib train --model Padim --data MVTecAD --trainer.accelerator xpu --trainer.strategy xpu_single ``` # βš™οΈ Hyperparameter Optimization @@ -247,7 +247,7 @@ anomalib benchmark --config tools/benchmarking/benchmark_params.yaml > πŸ’‘ **Tip:** Check individual model performance in their respective README files: > -> - [Patchcore Results](src/anomalib/models/image/patchcore/README.md#mvtec-ad-dataset) +> - [Patchcore Results](src/anomalib/models/image/patchcore/README.md#MVTecAD-ad-dataset) > - [Other Models](src/anomalib/models/) # ✍️ Reference diff --git a/docs/source/markdown/get_started/anomalib.md b/docs/source/markdown/get_started/anomalib.md index 34a9f130e9..f6807eec1f 100644 --- a/docs/source/markdown/get_started/anomalib.md +++ b/docs/source/markdown/get_started/anomalib.md @@ -256,7 +256,7 @@ By using the configuration file above, you can run the experiment with the follo Anomalib provides a benchmarking tool to evaluate the performance of the anomaly detection models on a given dataset. The benchmarking tool can be used to evaluate the performance of the models on a given dataset, or to compare the performance of multiple models on a given dataset. -Each model in anomalib is benchmarked on a set of datasets, and the results are available in `src/anomalib/models/README.md`. For example, the MVTec AD results for the Patchcore model are available in the corresponding [README.md](https://github.com/openvinotoolkit/anomalib/tree/main/src/anomalib/models/image/patchcore#mvtec-ad-dataset) file. +Each model in anomalib is benchmarked on a set of datasets, and the results are available in `src/anomalib/models/README.md`. For example, the MVTecAD AD results for the Patchcore model are available in the corresponding [README.md](https://github.com/openvinotoolkit/anomalib/tree/main/src/anomalib/models/image/patchcore#mvtec-ad-dataset) file. ::::{tab-set} diff --git a/docs/source/markdown/get_started/migration.md b/docs/source/markdown/get_started/migration.md index 65545e4787..fccd0d4052 100644 --- a/docs/source/markdown/get_started/migration.md +++ b/docs/source/markdown/get_started/migration.md @@ -69,9 +69,9 @@ between the old and new configuration files highlighted in a markdown diff forma +data: - name: mvtec - format: mvtec -+ class_path: anomalib.data.MVTec ++ class_path: anomalib.data.MVTecAD + init_args: -- path: ./datasets/MVTec +- path: ./datasets/MVTecAD + root: ./datasets/MVTec category: bottle image_size: 256 diff --git a/docs/source/markdown/guides/how_to/data/datamodules.md b/docs/source/markdown/guides/how_to/data/datamodules.md index 6be8a64b99..343aacca77 100644 --- a/docs/source/markdown/guides/how_to/data/datamodules.md +++ b/docs/source/markdown/guides/how_to/data/datamodules.md @@ -103,9 +103,9 @@ The data flow is: ### 1. Image DataModule ```python -from anomalib.data import MVTec +from anomalib.data import MVTecAD -datamodule = MVTec( +datamodule = MVTecAD( root="./datasets/MVTec", category="bottle", train_batch_size=32, diff --git a/docs/source/markdown/guides/how_to/data/datasets.md b/docs/source/markdown/guides/how_to/data/datasets.md index 3a8a593984..b2a4c3274d 100644 --- a/docs/source/markdown/guides/how_to/data/datasets.md +++ b/docs/source/markdown/guides/how_to/data/datasets.md @@ -46,10 +46,10 @@ Anomalib supports different types of datasets based on modality: The most common type, supporting RGB images: ```python -from anomalib.data.datasets import MVTecDataset +from anomalib.data.datasets import MVTecADDataset -# Create MVTec dataset -dataset = MVTecDataset( +# Create MVTecAD dataset +dataset = MVTecADDataset( root="./datasets/MVTec", category="bottle", split="train" diff --git a/docs/source/markdown/guides/how_to/data/input_tiling.md b/docs/source/markdown/guides/how_to/data/input_tiling.md index 6747519bdd..4d6ed3d67d 100644 --- a/docs/source/markdown/guides/how_to/data/input_tiling.md +++ b/docs/source/markdown/guides/how_to/data/input_tiling.md @@ -56,13 +56,13 @@ To use tiling from the API, we need to initialize the {py:class}`TilerConfigurat :lineno-start: 1 :emphasize-lines: 12, 15 # Import the required modules -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import Padim from anomalib.callbacks import TilerConfigurationCallback # Initialize the datamodule and model -datamodule = MVTec(num_workers=0, image_size=(128, 128)) +datamodule = MVTecAD(num_workers=0, image_size=(128, 128)) model = Padim() # prepare tiling configuration callback @@ -87,7 +87,7 @@ We pass it as trainer.callback, and then provide the parameters: ```{code-block} bash :emphasize-lines: 2, 3, 4, 5 -anomalib train --model Padim --data anomalib.data.MVTec +anomalib train --model Padim --data anomalib.data.MVTecAD --trainer.callbacks anomalib.callbacks.tiler_configuration.TilerConfigurationCallback --trainer.callbacks.enable True --trainer.callbacks.tile_size 128 @@ -111,7 +111,7 @@ trainer.callbacks: Then use the config from the CLI: ```{code-block} bash -anomalib train --model Padim --data anomalib.data.MVTec --config config.yaml +anomalib train --model Padim --data anomalib.data.MVTecAD --config config.yaml ``` ::: diff --git a/docs/source/markdown/guides/how_to/data/transforms.md b/docs/source/markdown/guides/how_to/data/transforms.md index d3e9f7fc4b..6eae0eb3ec 100644 --- a/docs/source/markdown/guides/how_to/data/transforms.md +++ b/docs/source/markdown/guides/how_to/data/transforms.md @@ -127,10 +127,10 @@ The `Resize` transform will get added to the exported model graph, and applied t Data augmentation refers to the practice of applying transforms to input images to increase the variability in the dataset. By transforming the images, we effectively increase the sample size which helps improve a model's generalization and robustness to variations in real-world scenarios. Augmentations are often randomized to maximize variability between training runs and/or epochs. Some common augmentations include flipping, rotating, or scaling images, adjusting brightness or contrast, adding noise, and cropping. -In Anomalib, data augmentations are configured from the `DataModule` and applied by the `Dataset`. Augmentations can be configured separately for each of the subsets (train, val, test) to suit different use-cases such as training set enrichment or test-time augmentations (TTA). All datamodules in Anomalib have the `train_augmentations`, `val_augmentations` and `test_augmentations` arguments, to which the user can pass a set of augmentation transforms. The following example shows how to add some random augmentations to the training set of an MVTec dataset: +In Anomalib, data augmentations are configured from the `DataModule` and applied by the `Dataset`. Augmentations can be configured separately for each of the subsets (train, val, test) to suit different use-cases such as training set enrichment or test-time augmentations (TTA). All datamodules in Anomalib have the `train_augmentations`, `val_augmentations` and `test_augmentations` arguments, to which the user can pass a set of augmentation transforms. The following example shows how to add some random augmentations to the training set of an MVTecAD dataset: ```python -from anomalib.data import MVTec +from anomalib.data import MVTecAD from torchvision.transforms import v2 augmentations = v2.Compose([ @@ -142,7 +142,7 @@ augmentations = v2.Compose([ v2.RandomGrayscale(p=0.1), # Convert images to grayscale with 10% probability ]) -datamodule = MVTec( +datamodule = MVTecAD( category="transistor", train_augmentations=augmentations, val_augmentations=None, @@ -175,7 +175,7 @@ augmentations = Compose( RandomHorizontalFlip(p=0.5), Normalize(mean=[0.48145466, 0.4578275, 0.40821073], std=[0.26862954, 0.26130258, 0.27577711]), ) -datamodule = MVTec(train_augmentations=augmentations) +datamodule = MVTecAD(train_augmentations=augmentations) model = Padim() engine = Engine() engine.fit(model, datamodule=datamodule) @@ -183,7 +183,7 @@ engine.fit(model, datamodule=datamodule) # Correct: pass the random flip as an augmentation to the datamodule, and pass the updated # Normalize transform to a new PreProcessor instance. augmentations = RandomHorizontalFlip(p=0.5) -datamodule = MVTec(train_augmentations=augmentations) +datamodule = MVTecAD(train_augmentations=augmentations) transform = Compose( Resize(size=(256, 256)), @@ -206,7 +206,7 @@ augmentations = Compose( RandomHorizontalFlip(p=0.5), Resize(size=(224, 224)), # overruled by resize in default model-specific transform ) -datamodule = MVTec(augmentations=augmentations) +datamodule = MVTecAD(augmentations=augmentations) model = Padim() @@ -217,7 +217,7 @@ engine.fit(model, datamodule=datamodule) # updated pre-processor instance with the new image shape to the model. The final # image size will be 224x224. augmentations = RandomHorizontalFlip(p=0.5) -datamodule = MVTec(augmentations=augmentations) +datamodule = MVTecAD(augmentations=augmentations) pre_processor = Padim.configure_pre_processor(image_size=(224, 224)) model = Padim(pre_processor=pre_processor) @@ -240,14 +240,14 @@ transform = Compose( pre_processor = PreProcessor(transform=transform) model = Padim(pre_processor=pre_processor) -datamodule = MVTec() +datamodule = MVTecAD() engine = Engine() engine.fit(model, datamodule=datamodule) # Correct: Pass the transform to the datamodule as `train_augmentation`. augmentations = RandomHorizontalFlip(p=0.5) -datamodule = MVTec(train_augmentation=augmentations) +datamodule = MVTecAD(train_augmentation=augmentations) model = Padim() diff --git a/docs/source/markdown/guides/how_to/models/feature_extractors.md b/docs/source/markdown/guides/how_to/models/feature_extractors.md index 7959b3fdff..50f334bc7f 100644 --- a/docs/source/markdown/guides/how_to/models/feature_extractors.md +++ b/docs/source/markdown/guides/how_to/models/feature_extractors.md @@ -95,12 +95,12 @@ When using API, we need to specify `backbone` and `layers` when instantiating th :lineno-start: 1 :emphasize-lines: 9 # Import the required modules -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.models import Padim from anomalib.engine import Engine # Initialize the datamodule, model, and engine -datamodule = MVTec(num_workers=0) +datamodule = MVTecAD(num_workers=0) # Specify backbone and layers model = Padim(backbone="resnet18", layers=["layer1", "layer2"]) engine = Engine(image_metrics=["AUROC"], pixel_metrics=["AUROC"]) diff --git a/docs/source/markdown/guides/how_to/training_on_intel_gpus/index.md b/docs/source/markdown/guides/how_to/training_on_intel_gpus/index.md index 17888d2cc3..ff768335dd 100644 --- a/docs/source/markdown/guides/how_to/training_on_intel_gpus/index.md +++ b/docs/source/markdown/guides/how_to/training_on_intel_gpus/index.md @@ -34,7 +34,7 @@ If the command returns `True`, then your PyTorch installation supports XPU. ## πŸ”Œ API ```python -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine, SingleXPUStrategy, XPUAccelerator from anomalib.models import Stfpm @@ -42,11 +42,11 @@ engine = Engine( strategy=SingleXPUStrategy(), accelerator=XPUAccelerator(), ) -engine.train(Stfpm(), datamodule=MVTec()) +engine.train(Stfpm(), datamodule=MVTecAD()) ``` ## ⌨️ CLI ```bash -anomalib train --model Padim --data MVTec --trainer.accelerator xpu --trainer.strategy xpu_single +anomalib train --model Padim --data MVTecAD --trainer.accelerator xpu --trainer.strategy xpu_single ``` diff --git a/docs/source/markdown/guides/reference/data/datamodules/image.md b/docs/source/markdown/guides/reference/data/datamodules/image.md index c79a299a92..e4918e656a 100644 --- a/docs/source/markdown/guides/reference/data/datamodules/image.md +++ b/docs/source/markdown/guides/reference/data/datamodules/image.md @@ -35,8 +35,8 @@ Custom folder-based dataset organization. Surface defect detection in electrical commutators. ::: -:::{grid-item-card} MVTec -:link: anomalib.data.datamodules.image.MVTec +:::{grid-item-card} MVTecAD +:link: anomalib.data.datamodules.image.MVTecAD :link-type: doc Industrial anomaly detection benchmark. @@ -54,7 +54,7 @@ Visual inspection of surface anomalies. ```{eval-rst} .. automodule:: anomalib.data - :members: BTech, Datumaro, Folder, Kolektor, MVTec, Visa + :members: BTech, Datumaro, Folder, Kolektor, MVTecAD, Visa :undoc-members: :show-inheritance: ``` diff --git a/docs/source/markdown/guides/reference/data/datamodules/image/index.md b/docs/source/markdown/guides/reference/data/datamodules/image/index.md index 50e1c4a86d..a1ed63e7dd 100644 --- a/docs/source/markdown/guides/reference/data/datamodules/image/index.md +++ b/docs/source/markdown/guides/reference/data/datamodules/image/index.md @@ -35,11 +35,11 @@ Custom folder-based datamodule for organizing your own image dataset. Kolektor Surface-Defect dataset datamodule. ::: -:::{grid-item-card} MVTec +:::{grid-item-card} MVTecAD :link: mvtec :link-type: doc -MVTec AD dataset datamodule for unsupervised anomaly detection. +MVTecAD AD dataset datamodule for unsupervised anomaly detection. ::: :::{grid-item-card} Visa diff --git a/docs/source/markdown/guides/reference/data/datamodules/image/mvtec.md b/docs/source/markdown/guides/reference/data/datamodules/image/mvtec.md index 3ef6847c0d..c7a2918442 100644 --- a/docs/source/markdown/guides/reference/data/datamodules/image/mvtec.md +++ b/docs/source/markdown/guides/reference/data/datamodules/image/mvtec.md @@ -1,4 +1,4 @@ -# MVTec Datamodule +# MVTecAD Datamodule ```{eval-rst} .. automodule:: anomalib.data.datamodules.image.mvtec diff --git a/docs/source/markdown/guides/reference/data/datamodules/index.md b/docs/source/markdown/guides/reference/data/datamodules/index.md index 699c4e7b3c..84722b9fb4 100644 --- a/docs/source/markdown/guides/reference/data/datamodules/index.md +++ b/docs/source/markdown/guides/reference/data/datamodules/index.md @@ -35,7 +35,7 @@ Custom folder-based datamodule for organizing your own image dataset. Kolektor Surface-Defect dataset datamodule. ::: -:::{grid-item-card} MVTec +:::{grid-item-card} MVTecAD :link: image/mvtec :link-type: doc diff --git a/docs/source/markdown/guides/reference/data/index.md b/docs/source/markdown/guides/reference/data/index.md index 26635a0380..a549e831cb 100644 --- a/docs/source/markdown/guides/reference/data/index.md +++ b/docs/source/markdown/guides/reference/data/index.md @@ -23,7 +23,7 @@ Core data structures that define how data is represented and validated throughou :link-type: doc :class-card: custom-card -Ready-to-use PyTorch Dataset implementations of standard benchmark datasets (MVTec, BTech) and support for custom datasets across multiple modalities (Image, Video, Depth). +Ready-to-use PyTorch Dataset implementations of standard benchmark datasets (MVTecAD, BTech) and support for custom datasets across multiple modalities (Image, Video, Depth). +++ [Learn more Β»](./datasets/index) diff --git a/docs/source/markdown/guides/reference/pipelines/benchmark/index.md b/docs/source/markdown/guides/reference/pipelines/benchmark/index.md index 93bae8627e..5a77f901c8 100644 --- a/docs/source/markdown/guides/reference/pipelines/benchmark/index.md +++ b/docs/source/markdown/guides/reference/pipelines/benchmark/index.md @@ -12,7 +12,7 @@ benchmark: class_path: grid_search: [Padim, Patchcore] data: - class_path: MVTec + class_path: MVTecAD init_args: category: grid: diff --git a/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml b/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml index d3b2befb4a..4d8abf93f9 100644 --- a/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml +++ b/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml @@ -1,6 +1,6 @@ class_path: anomalib.data.Folder init_args: - name: "MVTec" + name: "MVTecAD" root: "datasets/MVTec/transistor" normal_dir: "train/good" abnormal_dir: "test/bent_lead" diff --git a/docs/source/snippets/data/transforms/datamodule_custom.txt b/docs/source/snippets/data/transforms/datamodule_custom.txt index c8cbcbf635..295232f7c3 100644 --- a/docs/source/snippets/data/transforms/datamodule_custom.txt +++ b/docs/source/snippets/data/transforms/datamodule_custom.txt @@ -1,7 +1,7 @@ from torchvision.transforms.v2 import Resize transform = Resize((256, 256)) -datamodule = MVTec(transform=transform) +datamodule = MVTecAD(transform=transform) datamodule.prepare_data() datamodule.setup() diff --git a/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml b/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml index 9ce9dca453..c3cfbdb869 100644 --- a/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml +++ b/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml @@ -1,4 +1,4 @@ -class_path: anomalib.data.MVTec +class_path: anomalib.data.MVTecAD init_args: root: ./datasets/MVTec category: bottle diff --git a/docs/source/snippets/data/transforms/datamodule_default.txt b/docs/source/snippets/data/transforms/datamodule_default.txt index f8b9494030..504677ef93 100644 --- a/docs/source/snippets/data/transforms/datamodule_default.txt +++ b/docs/source/snippets/data/transforms/datamodule_default.txt @@ -1,6 +1,6 @@ -from anomalib.data import MVTec +from anomalib.data import MVTecAD -datamodule = MVTec() +datamodule = MVTecAD() datamodule.prepare_data() datamodule.setup() diff --git a/docs/source/snippets/data/transforms/datamodule_train_eval.txt b/docs/source/snippets/data/transforms/datamodule_train_eval.txt index 625a19b724..647914c5b2 100644 --- a/docs/source/snippets/data/transforms/datamodule_train_eval.txt +++ b/docs/source/snippets/data/transforms/datamodule_train_eval.txt @@ -15,7 +15,7 @@ eval_transform = Compose( ], ) -datamodule = MVTec(train_transform=train_transform, eval_transform=eval_transform) +datamodule = MVTecAD(train_transform=train_transform, eval_transform=eval_transform) datamodule.prepare_data() datamodule.setup() diff --git a/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml b/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml index 19005b97bc..60edfe29ac 100644 --- a/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml +++ b/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml @@ -1,4 +1,4 @@ -class_path: anomalib.data.MVTec +class_path: anomalib.data.MVTecAD init_args: root: ./datasets/MVTec category: bottle diff --git a/docs/source/snippets/data/transforms/inference.txt b/docs/source/snippets/data/transforms/inference.txt index 914201fafc..a132d520ad 100644 --- a/docs/source/snippets/data/transforms/inference.txt +++ b/docs/source/snippets/data/transforms/inference.txt @@ -20,7 +20,7 @@ eval_transform = Compose( ], ) -datamodule = MVTec(train_transform=train_transform, eval_transform=eval_transform) +datamodule = MVTecAD(train_transform=train_transform, eval_transform=eval_transform) engine.fit(model, datamodule=datamodule) diff --git a/docs/source/snippets/data/transforms/inference_cli.yaml b/docs/source/snippets/data/transforms/inference_cli.yaml index 88e59fb258..e9d11062a4 100644 --- a/docs/source/snippets/data/transforms/inference_cli.yaml +++ b/docs/source/snippets/data/transforms/inference_cli.yaml @@ -1,4 +1,4 @@ -class_path: anomalib.data.MVTec +class_path: anomalib.data.MVTecAD init_args: root: ./datasets/MVTec category: bottle diff --git a/docs/source/snippets/data/transforms/model_fit.txt b/docs/source/snippets/data/transforms/model_fit.txt index 436a43618a..aaab09d541 100644 --- a/docs/source/snippets/data/transforms/model_fit.txt +++ b/docs/source/snippets/data/transforms/model_fit.txt @@ -1,7 +1,7 @@ from anomalib.engine import Engine # instantiate the datamodule without passing custom transforms -datamodule = MVTec() +datamodule = MVTecAD() # initially, the datamodule will not have any transforms defined datamodule.train_transform is None # True diff --git a/docs/source/snippets/data/transforms/model_fit_cli.sh b/docs/source/snippets/data/transforms/model_fit_cli.sh index de86235129..4efc3f5822 100644 --- a/docs/source/snippets/data/transforms/model_fit_cli.sh +++ b/docs/source/snippets/data/transforms/model_fit_cli.sh @@ -1 +1 @@ -anomalib fit --model Patchcore --data MVTec +anomalib fit --model Patchcore --data MVTecAD diff --git a/docs/source/snippets/inference/cli/lightning.txt b/docs/source/snippets/inference/cli/lightning.txt index 50ea377cf1..5d8eb52a1c 100644 --- a/docs/source/snippets/inference/cli/lightning.txt +++ b/docs/source/snippets/inference/cli/lightning.txt @@ -3,12 +3,12 @@ anomalib predict -h # Predict by using the default values. anomalib predict --model anomalib.models.Patchcore \ - --data anomalib.data.MVTec \ + --data anomalib.data.MVTecAD \ --ckpt_path # Predict by overriding arguments. anomalib predict --model anomalib.models.Patchcore \ - --data anomalib.data.MVTec \ + --data anomalib.data.MVTecAD \ --ckpt_path --return_predictions diff --git a/docs/source/snippets/train/api/default.txt b/docs/source/snippets/train/api/default.txt index 1fe6cb895c..40f0a3d8cc 100644 --- a/docs/source/snippets/train/api/default.txt +++ b/docs/source/snippets/train/api/default.txt @@ -1,10 +1,10 @@ # Import the required modules -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import EfficientAd # Initialize the datamodule, model and engine -datamodule = MVTec(train_batch_size=1) +datamodule = MVTecAD(train_batch_size=1) model = EfficientAd() engine = Engine(max_epochs=5) diff --git a/docs/source/snippets/train/cli/default.txt b/docs/source/snippets/train/cli/default.txt index 1990dbf97e..3cef2d4cc9 100644 --- a/docs/source/snippets/train/cli/default.txt +++ b/docs/source/snippets/train/cli/default.txt @@ -2,10 +2,10 @@ anomalib train -h # Train by using the default values. -anomalib train --model EfficientAd --data anomalib.data.MVTec --data.train_batch_size 1 +anomalib train --model EfficientAd --data anomalib.data.MVTecAD --data.train_batch_size 1 # Train by overriding arguments. -anomalib train --model EfficientAd --data anomalib.data.MVTec --data.train_batch_size 1 --data.category transistor +anomalib train --model EfficientAd --data anomalib.data.MVTecAD --data.train_batch_size 1 --data.category transistor # Train by using a config file. anomalib train --config diff --git a/examples/api/01_getting_started/basic_training.py b/examples/api/01_getting_started/basic_training.py index 51ca2d70cb..fddc0b998d 100644 --- a/examples/api/01_getting_started/basic_training.py +++ b/examples/api/01_getting_started/basic_training.py @@ -8,13 +8,13 @@ """ # 1. Import required modules -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import EfficientAd # 2. Create a dataset -# MVTec is a popular dataset for anomaly detection -datamodule = MVTec( +# MVTecAD is a popular dataset for anomaly detection +datamodule = MVTecAD( root="./datasets/MVTec", # Path to download/store the dataset category="bottle", # MVTec category to use train_batch_size=32, # Number of images per training batch diff --git a/examples/api/02_data/mvtec.py b/examples/api/02_data/mvtec.py index 56ac8a4dfd..39e4a86d13 100644 --- a/examples/api/02_data/mvtec.py +++ b/examples/api/02_data/mvtec.py @@ -1,24 +1,24 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -"""Example showing how to use the MVTec dataset with Anomalib. +"""Example showing how to use the MVTecAD dataset with Anomalib. -MVTec is a widely-used dataset for anomaly detection, containing multiple +MVTecAD is a widely-used dataset for anomaly detection, containing multiple categories of industrial objects with various types of defects. """ -from anomalib.data import MVTec +from anomalib.data import MVTecAD # 1. Basic Usage # Load a specific category with default settings -datamodule = MVTec( +datamodule = MVTecAD( root="./datasets/MVTec", category="bottle", ) # 2. Advanced Configuration # Customize data loading and preprocessing -datamodule = MVTec( +datamodule = MVTecAD( root="./datasets/MVTec", category="bottle", train_batch_size=32, @@ -31,7 +31,7 @@ # 3. Using Multiple Categories # Train on multiple categories (if supported by the model) for category in ["bottle", "cable", "capsule"]: - category_data = MVTec( + category_data = MVTecAD( root="./datasets/MVTec", category=category, ) diff --git a/examples/api/03_models/efficient_ad.py b/examples/api/03_models/efficient_ad.py index 338133d920..4cf7360619 100644 --- a/examples/api/03_models/efficient_ad.py +++ b/examples/api/03_models/efficient_ad.py @@ -7,7 +7,7 @@ particularly well-suited for industrial inspection tasks. """ -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import EfficientAd @@ -25,8 +25,8 @@ # 3. Training Pipeline # Set up the complete training pipeline -datamodule = MVTec( - root="./datasets/MVTec", +datamodule = MVTecAD( + root="./datasets/MVTecAD", category="bottle", train_batch_size=32, ) diff --git a/examples/api/03_models/padim.py b/examples/api/03_models/padim.py index c61edb5111..3fdf8e24fd 100644 --- a/examples/api/03_models/padim.py +++ b/examples/api/03_models/padim.py @@ -7,7 +7,7 @@ and multivariate Gaussian modeling for anomaly detection. """ -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import Padim @@ -26,7 +26,7 @@ # 3. Training Pipeline # Set up the complete training pipeline -datamodule = MVTec( +datamodule = MVTecAD( root="./datasets/MVTec", category="bottle", train_batch_size=32, diff --git a/examples/api/03_models/patchcore.py b/examples/api/03_models/patchcore.py index 9acf435402..2e23d3bd73 100644 --- a/examples/api/03_models/patchcore.py +++ b/examples/api/03_models/patchcore.py @@ -7,7 +7,7 @@ to extract and store patch features for anomaly detection. """ -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import Patchcore @@ -26,7 +26,7 @@ # 3. Training Pipeline # Set up the complete training pipeline -datamodule = MVTec( +datamodule = MVTecAD( root="./datasets/MVTec", category="bottle", train_batch_size=32, diff --git a/examples/api/04_advanced/loggers.py b/examples/api/04_advanced/loggers.py index ad2a81a426..26bc63498b 100644 --- a/examples/api/04_advanced/loggers.py +++ b/examples/api/04_advanced/loggers.py @@ -9,7 +9,7 @@ from pathlib import Path -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.loggers import AnomalibMLFlowLogger, AnomalibTensorBoardLogger, AnomalibWandbLogger from anomalib.models import Patchcore @@ -54,7 +54,7 @@ # 5. Complete Training Example with Logging model = Patchcore() -datamodule = MVTec( +datamodule = MVTecAD( root=Path("./datasets/MVTec"), category="bottle", ) diff --git a/examples/api/05_pipelines/complete_pipeline.py b/examples/api/05_pipelines/complete_pipeline.py index 001f7800fb..93aa74a1b3 100644 --- a/examples/api/05_pipelines/complete_pipeline.py +++ b/examples/api/05_pipelines/complete_pipeline.py @@ -11,7 +11,7 @@ from pathlib import Path -from anomalib.data import MVTec, PredictDataset +from anomalib.data import MVTecAD, PredictDataset from anomalib.deploy import ExportType from anomalib.engine import Engine from anomalib.models import Patchcore @@ -22,7 +22,7 @@ # Initialize components model = Patchcore() -datamodule = MVTec( +datamodule = MVTecAD( root=Path("./datasets/MVTec"), category="bottle", train_batch_size=32, diff --git a/examples/cli/01_getting_started/basic_training.sh b/examples/cli/01_getting_started/basic_training.sh index 64d53d0916..d1a346941f 100644 --- a/examples/cli/01_getting_started/basic_training.sh +++ b/examples/cli/01_getting_started/basic_training.sh @@ -20,7 +20,7 @@ anomalib train --model efficient_ad \ --trainer.max_epochs 10 # 3. Using a Different Dataset -# Train on a specific category of MVTec dataset -echo -e "\nTraining on MVTec bottle category..." +# Train on a specific category of MVTecAD dataset +echo -e "\nTraining on MVTecAD bottle category..." anomalib train --model efficient_ad \ --data.category bottle diff --git a/examples/cli/02_data/mvtec.sh b/examples/cli/02_data/mvtec.sh index 61c1e51c76..4afc4da78e 100644 --- a/examples/cli/02_data/mvtec.sh +++ b/examples/cli/02_data/mvtec.sh @@ -3,13 +3,13 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -# Using MVTec Dataset with Anomalib CLI +# Using MVTecAD Dataset with Anomalib CLI # ----------------------------------- -# This example shows different ways to use the MVTec dataset. +# This example shows different ways to use the MVTecAD dataset. # 1. Basic Usage -# Train on a specific MVTec category -echo "Training on MVTec bottle category..." +# Train on a specific MVTecAD category +echo "Training on MVTecAD bottle category..." anomalib train \ --model efficient_ad \ --data.category bottle @@ -28,7 +28,7 @@ anomalib train \ # 3. Training Multiple Categories # Train separate models for different categories -echo -e "\nTraining on multiple MVTec categories..." +echo -e "\nTraining on multiple MVTecAD categories..." for category in "bottle" "cable" "capsule"; do echo "Training on category: $category" anomalib train \ diff --git a/examples/cli/04_advanced/custom_pipeline.sh b/examples/cli/04_advanced/custom_pipeline.sh index 81515113b4..a6324200d1 100644 --- a/examples/cli/04_advanced/custom_pipeline.sh +++ b/examples/cli/04_advanced/custom_pipeline.sh @@ -12,7 +12,7 @@ echo "Training with custom pipeline components..." anomalib train \ --model patchcore \ - --data MVTec \ + --data MVTecAD \ --data.category bottle \ --model.backbone resnet18 \ --model.layers layer2 layer3 \ @@ -29,7 +29,7 @@ anomalib train \ echo -e "\nTraining with advanced settings..." anomalib train \ --model patchcore \ - --data MVTec \ + --data MVTecAD \ --trainer.max_epochs 1 \ --trainer.accelerator gpu \ --trainer.devices 1 \ @@ -66,7 +66,7 @@ for backbone in "resnet18" "wide_resnet50_2"; do echo "Training with backbone: $backbone, layers: ${layers[*]}" anomalib train \ --model patchcore \ - --data MVTec \ + --data MVTecAD \ --model.backbone "$backbone" \ --model.layers "${layers[@]}" \ --trainer.default_root_dir "results/search/${backbone}_${layer_combo}" diff --git a/examples/configs/data/mvtec.yaml b/examples/configs/data/mvtec.yaml index 78c8a5c01c..21f5d23b99 100644 --- a/examples/configs/data/mvtec.yaml +++ b/examples/configs/data/mvtec.yaml @@ -1,4 +1,4 @@ -class_path: anomalib.data.MVTec +class_path: anomalib.data.MVTecAD init_args: root: ./datasets/MVTec category: bottle diff --git a/examples/notebooks/000_getting_started/001_getting_started.ipynb b/examples/notebooks/000_getting_started/001_getting_started.ipynb index 958b3aa686..325934d1d3 100644 --- a/examples/notebooks/000_getting_started/001_getting_started.ipynb +++ b/examples/notebooks/000_getting_started/001_getting_started.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a8fb075ef8304fd22c5578a4b79a20955d16615632089c69fc098ab8b1e5f14 -size 18715 +oid sha256:b111d020f79198df6f1769c977cef240d50567bb6a01bc0efe5750c5eff14185 +size 18724 diff --git a/examples/notebooks/100_datamodules/102_mvtec.ipynb b/examples/notebooks/100_datamodules/102_mvtec.ipynb index 8ed8115fe0..7f5f229a90 100644 --- a/examples/notebooks/100_datamodules/102_mvtec.ipynb +++ b/examples/notebooks/100_datamodules/102_mvtec.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:7f58d11023d872fd6aecd0a9b99dc963e225bf34667d41330a4686bf561b9b73 -size 7857 +oid sha256:425b1c0edd05d919b5040d3e7801b99643652c3f026c6c56a47bab65f5f2e7a9 +size 7869 diff --git a/examples/notebooks/100_datamodules/README.md b/examples/notebooks/100_datamodules/README.md index 5e6d07f44c..583737c844 100644 --- a/examples/notebooks/100_datamodules/README.md +++ b/examples/notebooks/100_datamodules/README.md @@ -31,7 +31,7 @@ anomalib β”‚Β Β  β”œβ”€β”€ inference.py β”‚ β”‚ β”œβ”€β”€ InferenceDataset β”‚ β”‚ mvtec.py -β”‚ β”‚ β”œβ”€β”€ MVTecDataset +β”‚ β”‚ β”œβ”€β”€ MVTecADDataset └── └── └── MVTec ``` diff --git a/examples/notebooks/200_models/201_fastflow.ipynb b/examples/notebooks/200_models/201_fastflow.ipynb index ef342d3c93..d28ab51ae9 100644 --- a/examples/notebooks/200_models/201_fastflow.ipynb +++ b/examples/notebooks/200_models/201_fastflow.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:4a2a6abc8f27064ed4741dc328085dd231c2b7035c6111c16d0dc75329c6426a -size 13086 +oid sha256:9a064c39b1a9e040a37c9b6a4333fcca65ad39ad1bf69061ace4140b94fa1eb4 +size 13090 diff --git a/examples/notebooks/600_loggers/601_mlflow_logging.ipynb b/examples/notebooks/600_loggers/601_mlflow_logging.ipynb index 563640a3e9..f0dadadefc 100644 --- a/examples/notebooks/600_loggers/601_mlflow_logging.ipynb +++ b/examples/notebooks/600_loggers/601_mlflow_logging.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:01942f28d6ce2a26d88c61ffc8ac563830b4ee5c1d18b97d1af21ee517518ac8 -size 8379 +oid sha256:074a352435337f915dcae80bd1d203bd3f9c90315fec5022a08b688a1ba93886 +size 8385 diff --git a/examples/notebooks/700_metrics/701a_aupimo.ipynb b/examples/notebooks/700_metrics/701a_aupimo.ipynb index ff7e496b17..e8ebfda2b4 100644 --- a/examples/notebooks/700_metrics/701a_aupimo.ipynb +++ b/examples/notebooks/700_metrics/701a_aupimo.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:28509cb5b0bb871c0e905b55c392dad86fb2d261ab7acdd4a955bd38ace9ca18 -size 11019 +oid sha256:bd5df1910fdb0b54a4200d6aba4be95ed303659d52c708c00c871241f5968be4 +size 11023 diff --git a/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb b/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb index 99685bc1a4..c65f6a7e4c 100644 --- a/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb +++ b/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:498d7e4fec8b38bc5b902faa3349039e61de602f07bdab9fc6a3d071dcd684ac -size 112175 +oid sha256:8063f7f19bb53d799edc49e5de4495b0369ebec658613ce71c7dd163aa9a41e4 +size 112179 diff --git a/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb b/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb index 0c97783e76..e02f4551c1 100644 --- a/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb +++ b/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:eae9da4e33a50673ccc1d47c56c4a4f00c85f5b7076bd77fad6ddcd1a4ce77bf -size 230521 +oid sha256:0bcbdc7140626d330363b82abf1af0bf4e3fffe37a1746036ff842b92158a63c +size 230525 diff --git a/examples/notebooks/README.md b/examples/notebooks/README.md index 8c9e998cfb..361ce2a9de 100644 --- a/examples/notebooks/README.md +++ b/examples/notebooks/README.md @@ -30,7 +30,7 @@ To install Python, Git and other required tools, [OpenVINO Notebooks](https://gi | Notebook | GitHub | Colab | | -------- | ---------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | | BTech | [101_btech](100_datamodules/101_btech.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/examples/notebooks/100_datamodules/101_btech.ipynb) | -| MVTec | [102_mvtec](100_datamodules/102_mvtec.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/examples/notebooks/100_datamodules/102_mvtec.ipynb) | +| MVTecAD | [102_mvtec](100_datamodules/102_mvtec.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/examples/notebooks/100_datamodules/102_mvtec.ipynb) | | Folder | [103_folder](100_datamodules/103_folder.ipynb) | [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/openvinotoolkit/anomalib/blob/main/examples/notebooks/100_datamodules/103_folder.ipynb) | ## 2. Models diff --git a/src/anomalib/cli/cli.py b/src/anomalib/cli/cli.py index 88a9bf9fc7..c4cd051212 100644 --- a/src/anomalib/cli/cli.py +++ b/src/anomalib/cli/cli.py @@ -56,12 +56,12 @@ class AnomalibCLI: Run from command line: >>> import sys - >>> sys.argv = ["anomalib", "train", "--model", "Padim", "--data", "MVTec"] + >>> sys.argv = ["anomalib", "train", "--model", "Padim", "--data", "MVTecAD"] Run programmatically: >>> from anomalib.cli import AnomalibCLI - >>> cli = AnomalibCLI(["train", "--model", "Padim", "--data", "MVTec"], run=False) + >>> cli = AnomalibCLI(["train", "--model", "Padim", "--data", "MVTecAD"], run=False) Note: The CLI supports both YAML and JSON configuration files. Configuration can be diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py index 132e2a7954..6c26c0a3f3 100644 --- a/src/anomalib/data/__init__.py +++ b/src/anomalib/data/__init__.py @@ -9,19 +9,12 @@ - Helper functions for data loading and validation Example: - >>> from anomalib.data import get_datamodule - >>> from omegaconf import DictConfig - >>> config = DictConfig({ - ... "data": { - ... "class_path": "MVTec", - ... "init_args": { - ... "root": "./datasets/MVTec", - ... "category": "bottle", - ... "image_size": (256, 256) - ... } - ... } - ... }) - >>> datamodule = get_datamodule(config) + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD( + ... root="./datasets/MVTec", + ... category="bottle", + ... image_size=(256, 256) + ... ) """ # Copyright (C) 2022-2025 Intel Corporation @@ -56,13 +49,20 @@ # Datamodules from .datamodules.base import AnomalibDataModule from .datamodules.depth import DepthDataFormat, Folder3D, MVTec3D -from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTec, Visa +from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTec, MVTecAD, Visa from .datamodules.video import Avenue, ShanghaiTech, UCSDped, VideoDataFormat # Datasets from .datasets import AnomalibDataset from .datasets.depth import Folder3DDataset, MVTec3DDataset -from .datasets.image import BTechDataset, DatumaroDataset, FolderDataset, KolektorDataset, MVTecDataset, VisaDataset +from .datasets.image import ( + BTechDataset, + DatumaroDataset, + FolderDataset, + KolektorDataset, + MVTecADDataset, + VisaDataset, +) from .datasets.video import AvenueDataset, ShanghaiTechDataset, UCSDpedDataset from .predict import PredictDataset @@ -98,7 +98,7 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule >>> from omegaconf import DictConfig >>> config = DictConfig({ ... "data": { - ... "class_path": "MVTec", + ... "class_path": "MVTecAD", ... "init_args": {"root": "./datasets/MVTec"} ... } ... }) @@ -127,51 +127,53 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule __all__ = [ - # Anomalib dataclasses - "DatasetItem", + # Base Classes + "AnomalibDataModule", + "AnomalibDataset", + # Data Classes "Batch", - "InferenceBatch", - "ImageItem", - "ImageBatch", - "VideoItem", - "VideoBatch", - "DepthItem", + "DatasetItem", "DepthBatch", - "NumpyImageItem", + "DepthItem", + "ImageBatch", + "ImageItem", + "InferenceBatch", "NumpyImageBatch", - "NumpyVideoItem", + "NumpyImageItem", "NumpyVideoBatch", - # Anomalib datasets - "AnomalibDataset", + "NumpyVideoItem", + "VideoBatch", + "VideoItem", + # Depth + "DepthDataFormat", + "Folder3D", "Folder3DDataset", + "MVTec3D", "MVTec3DDataset", + # Image + "BTech", "BTechDataset", + "Datumaro", "DatumaroDataset", + "Folder", "FolderDataset", + "ImageDataFormat", + "Kolektor", "KolektorDataset", - "MVTecDataset", + "MVTecAD", + "MVTec", + "MVTecADDataset", + "Visa", "VisaDataset", + # Video + "Avenue", "AvenueDataset", + "ShanghaiTech", "ShanghaiTechDataset", + "UCSDped", "UCSDpedDataset", - "PredictDataset", - # Anomalib datamodules - "AnomalibDataModule", - "DepthDataFormat", - "ImageDataFormat", "VideoDataFormat", - "get_datamodule", - "BTech", - "Datumaro", - "Folder", - "Folder3D", - "Kolektor", - "MVTec", - "MVTec3D", - "Avenue", - "UCSDped", - "ShanghaiTech", - "Visa", - "LabelName", + # Predict "PredictDataset", + "get_datamodule", ] diff --git a/src/anomalib/data/datamodules/image/__init__.py b/src/anomalib/data/datamodules/image/__init__.py index deb98863ed..66e87c7852 100644 --- a/src/anomalib/data/datamodules/image/__init__.py +++ b/src/anomalib/data/datamodules/image/__init__.py @@ -7,20 +7,20 @@ - ``Datumaro``: Dataset in Datumaro format (Intel Getiβ„’ export) - ``Folder``: Custom folder structure with normal/abnormal images - ``Kolektor``: Kolektor Surface-Defect Dataset -- ``MVTec``: MVTec Anomaly Detection Dataset +- ``MVTecAD``: MVTec Anomaly Detection Dataset - ``Visa``: Visual Inspection for Steel Anomaly Dataset Example: - Load the MVTec dataset:: + Load the MVTec AD dataset:: - >>> from anomalib.data import MVTec - >>> datamodule = MVTec( - ... root="./datasets/MVTec", + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD( + ... root="./datasets/MVTecAD", ... category="bottle" ... ) """ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from enum import Enum @@ -29,7 +29,7 @@ from .datumaro import Datumaro from .folder import Folder from .kolektor import Kolektor -from .mvtec import MVTec +from .mvtec import MVTec, MVTecAD # MVTec is an alias for backward compatibility from .visa import Visa @@ -43,7 +43,7 @@ class ImageDataFormat(str, Enum): - ``FOLDER``: Custom folder structure - ``FOLDER_3D``: Custom folder structure for 3D images - ``KOLEKTOR``: Kolektor Surface-Defect Dataset - - ``MVTEC``: MVTec AD Dataset + - ``MVTEC_AD``: MVTec AD Dataset - ``MVTEC_3D``: MVTec 3D AD Dataset - ``VISA``: Visual Inspection for Steel Anomaly Dataset """ @@ -53,9 +53,18 @@ class ImageDataFormat(str, Enum): FOLDER = "folder" FOLDER_3D = "folder_3d" KOLEKTOR = "kolektor" - MVTEC = "mvtec" + MVTEC_AD = "mvtec_ad" + MVTEC = "mvtec" # Keep the value same for backward compatibility MVTEC_3D = "mvtec_3d" VISA = "visa" -__all__ = ["BTech", "Datumaro", "Folder", "Kolektor", "MVTec", "Visa"] +__all__ = [ + "BTech", + "Datumaro", + "Folder", + "Kolektor", + "MVTecAD", + "MVTec", # Include both for backward compatibility + "Visa", +] diff --git a/src/anomalib/data/datamodules/image/mvtec.py b/src/anomalib/data/datamodules/image/mvtec.py index 9c6da2e104..293fce1966 100644 --- a/src/anomalib/data/datamodules/image/mvtec.py +++ b/src/anomalib/data/datamodules/image/mvtec.py @@ -5,11 +5,11 @@ automatically. Example: - Create a MVTec datamodule:: + Create a MVTec AD datamodule:: - >>> from anomalib.data import MVTec - >>> datamodule = MVTec( - ... root="./datasets/mvtec", + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD( + ... root="./datasets/MVTecAD", ... category="bottle" ... ) @@ -18,7 +18,7 @@ format when first used. The directory structure after preparation will be:: datasets/ - └── mvtec/ + └── MVTecAD/ β”œβ”€β”€ bottle/ β”œβ”€β”€ cable/ └── ... @@ -51,27 +51,27 @@ from torchvision.transforms.v2 import Transform from anomalib.data.datamodules.base.image import AnomalibDataModule -from anomalib.data.datasets.image.mvtec import MVTecDataset +from anomalib.data.datasets.image.mvtec_ad import MVTecADDataset from anomalib.data.utils import DownloadInfo, Split, TestSplitMode, ValSplitMode, download_and_extract logger = logging.getLogger(__name__) DOWNLOAD_INFO = DownloadInfo( - name="mvtec", + name="mvtec_ad", url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/" "download/420938113-1629952094/mvtec_anomaly_detection.tar.xz", hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d", ) -class MVTec(AnomalibDataModule): - """MVTec Datamodule. +class MVTecAD(AnomalibDataModule): + """MVTec AD Datamodule. Args: root (Path | str): Path to the root of the dataset. - Defaults to ``"./datasets/MVTec"``. - category (str): Category of the MVTec dataset (e.g. ``"bottle"`` or + Defaults to ``"./datasets/MVTecAD"``. + category (str): Category of the MVTec AD dataset (e.g. ``"bottle"`` or ``"cable"``). Defaults to ``"bottle"``. train_batch_size (int, optional): Training batch size. Defaults to ``32``. @@ -99,9 +99,9 @@ class MVTec(AnomalibDataModule): Defaults to ``None``. Example: - Create MVTec datamodule with default settings:: + Create MVTec AD datamodule with default settings:: - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> datamodule.setup() >>> i, data = next(enumerate(datamodule.train_dataloader())) >>> data.keys() @@ -112,18 +112,18 @@ class MVTec(AnomalibDataModule): Change the category:: - >>> datamodule = MVTec(category="cable") + >>> datamodule = MVTecAD(category="cable") Create validation set from test data:: - >>> datamodule = MVTec( + >>> datamodule = MVTecAD( ... val_split_mode=ValSplitMode.FROM_TEST, ... val_split_ratio=0.1 ... ) Create synthetic validation set:: - >>> datamodule = MVTec( + >>> datamodule = MVTecAD( ... val_split_mode=ValSplitMode.SYNTHETIC, ... val_split_ratio=0.2 ... ) @@ -131,7 +131,7 @@ class MVTec(AnomalibDataModule): def __init__( self, - root: Path | str = "./datasets/MVTec", + root: Path | str = "./datasets/MVTecAD", category: str = "bottle", train_batch_size: int = 32, eval_batch_size: int = 32, @@ -177,12 +177,12 @@ def _setup(self, _stage: str | None = None) -> None: is usually extracted from the test set, and the test set must therefore be created as early as the `fit` stage. """ - self.train_data = MVTecDataset( + self.train_data = MVTecADDataset( split=Split.TRAIN, root=self.root, category=self.category, ) - self.test_data = MVTecDataset( + self.test_data = MVTecADDataset( split=Split.TEST, root=self.root, category=self.category, @@ -198,8 +198,8 @@ def prepare_data(self) -> None: Example: Assume the dataset is not available on the file system:: - >>> datamodule = MVTec( - ... root="./datasets/MVTec", + >>> datamodule = MVTecAD( + ... root="./datasets/MVTecAD", ... category="bottle" ... ) >>> datamodule.prepare_data() @@ -207,7 +207,7 @@ def prepare_data(self) -> None: Directory structure after download:: datasets/ - └── MVTec/ + └── MVTecAD/ β”œβ”€β”€ bottle/ β”œβ”€β”€ cable/ └── ... @@ -216,3 +216,21 @@ def prepare_data(self) -> None: logger.info("Found the dataset.") else: download_and_extract(self.root, DOWNLOAD_INFO) + + +class MVTec(MVTecAD): + """MVTec datamodule class (Deprecated). + + This class is deprecated and will be removed in a future version. + Please use MVTecAD instead. + """ + + def __init__(self, *args, **kwargs) -> None: + import warnings + + warnings.warn( + "MVTec is deprecated and will be removed in a future version. Please use MVTecAD instead.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(*args, **kwargs) diff --git a/src/anomalib/data/datamodules/image/mvtec_ad.py b/src/anomalib/data/datamodules/image/mvtec_ad.py new file mode 100644 index 0000000000..293fce1966 --- /dev/null +++ b/src/anomalib/data/datamodules/image/mvtec_ad.py @@ -0,0 +1,236 @@ +"""MVTec AD Data Module. + +This module provides a PyTorch Lightning DataModule for the MVTec AD dataset. If +the dataset is not available locally, it will be downloaded and extracted +automatically. + +Example: + Create a MVTec AD datamodule:: + + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD( + ... root="./datasets/MVTecAD", + ... category="bottle" + ... ) + +Notes: + The dataset will be automatically downloaded and converted to the required + format when first used. The directory structure after preparation will be:: + + datasets/ + └── MVTecAD/ + β”œβ”€β”€ bottle/ + β”œβ”€β”€ cable/ + └── ... + +License: + MVTec AD dataset is released under the Creative Commons + Attribution-NonCommercial-ShareAlike 4.0 International License + (CC BY-NC-SA 4.0). + https://creativecommons.org/licenses/by-nc-sa/4.0/ + +Reference: + Paul Bergmann, Kilian Batzner, Michael Fauser, David Sattlegger, + Carsten Steger: The MVTec Anomaly Detection Dataset: A Comprehensive + Real-World Dataset for Unsupervised Anomaly Detection; in: International + Journal of Computer Vision 129(4):1038-1059, 2021, + DOI: 10.1007/s11263-020-01400-4. + + Paul Bergmann, Michael Fauser, David Sattlegger, Carsten Steger: MVTec AD β€” + A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection; + in: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), + 9584-9592, 2019, DOI: 10.1109/CVPR.2019.00982. +""" + +# Copyright (C) 2022-2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import logging +from pathlib import Path + +from torchvision.transforms.v2 import Transform + +from anomalib.data.datamodules.base.image import AnomalibDataModule +from anomalib.data.datasets.image.mvtec_ad import MVTecADDataset +from anomalib.data.utils import DownloadInfo, Split, TestSplitMode, ValSplitMode, download_and_extract + +logger = logging.getLogger(__name__) + + +DOWNLOAD_INFO = DownloadInfo( + name="mvtec_ad", + url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/" + "download/420938113-1629952094/mvtec_anomaly_detection.tar.xz", + hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d", +) + + +class MVTecAD(AnomalibDataModule): + """MVTec AD Datamodule. + + Args: + root (Path | str): Path to the root of the dataset. + Defaults to ``"./datasets/MVTecAD"``. + category (str): Category of the MVTec AD dataset (e.g. ``"bottle"`` or + ``"cable"``). Defaults to ``"bottle"``. + train_batch_size (int, optional): Training batch size. + Defaults to ``32``. + eval_batch_size (int, optional): Test batch size. + Defaults to ``32``. + num_workers (int, optional): Number of workers. + Defaults to ``8``. + train_augmentations (Transform | None): Augmentations to apply dto the training images + Defaults to ``None``. + val_augmentations (Transform | None): Augmentations to apply to the validation images. + Defaults to ``None``. + test_augmentations (Transform | None): Augmentations to apply to the test images. + Defaults to ``None``. + augmentations (Transform | None): General augmentations to apply if stage-specific + augmentations are not provided. + test_split_mode (TestSplitMode): Method to create test set. + Defaults to ``TestSplitMode.FROM_DIR``. + test_split_ratio (float): Fraction of data to use for testing. + Defaults to ``0.2``. + val_split_mode (ValSplitMode): Method to create validation set. + Defaults to ``ValSplitMode.SAME_AS_TEST``. + val_split_ratio (float): Fraction of data to use for validation. + Defaults to ``0.5``. + seed (int | None, optional): Seed for reproducibility. + Defaults to ``None``. + + Example: + Create MVTec AD datamodule with default settings:: + + >>> datamodule = MVTecAD() + >>> datamodule.setup() + >>> i, data = next(enumerate(datamodule.train_dataloader())) + >>> data.keys() + dict_keys(['image_path', 'label', 'image', 'mask_path', 'mask']) + + >>> data["image"].shape + torch.Size([32, 3, 256, 256]) + + Change the category:: + + >>> datamodule = MVTecAD(category="cable") + + Create validation set from test data:: + + >>> datamodule = MVTecAD( + ... val_split_mode=ValSplitMode.FROM_TEST, + ... val_split_ratio=0.1 + ... ) + + Create synthetic validation set:: + + >>> datamodule = MVTecAD( + ... val_split_mode=ValSplitMode.SYNTHETIC, + ... val_split_ratio=0.2 + ... ) + """ + + def __init__( + self, + root: Path | str = "./datasets/MVTecAD", + category: str = "bottle", + train_batch_size: int = 32, + eval_batch_size: int = 32, + num_workers: int = 8, + train_augmentations: Transform | None = None, + val_augmentations: Transform | None = None, + test_augmentations: Transform | None = None, + augmentations: Transform | None = None, + test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR, + test_split_ratio: float = 0.2, + val_split_mode: ValSplitMode | str = ValSplitMode.SAME_AS_TEST, + val_split_ratio: float = 0.5, + seed: int | None = None, + ) -> None: + super().__init__( + train_batch_size=train_batch_size, + eval_batch_size=eval_batch_size, + num_workers=num_workers, + train_augmentations=train_augmentations, + val_augmentations=val_augmentations, + test_augmentations=test_augmentations, + augmentations=augmentations, + test_split_mode=test_split_mode, + test_split_ratio=test_split_ratio, + val_split_mode=val_split_mode, + val_split_ratio=val_split_ratio, + seed=seed, + ) + + self.root = Path(root) + self.category = category + + def _setup(self, _stage: str | None = None) -> None: + """Set up the datasets and perform dynamic subset splitting. + + This method may be overridden in subclass for custom splitting behaviour. + + Note: + The stage argument is not used here. This is because, for a given + instance of an AnomalibDataModule subclass, all three subsets are + created at the first call of setup(). This is to accommodate the + subset splitting behaviour of anomaly tasks, where the validation set + is usually extracted from the test set, and the test set must + therefore be created as early as the `fit` stage. + """ + self.train_data = MVTecADDataset( + split=Split.TRAIN, + root=self.root, + category=self.category, + ) + self.test_data = MVTecADDataset( + split=Split.TEST, + root=self.root, + category=self.category, + ) + + def prepare_data(self) -> None: + """Download the dataset if not available. + + This method checks if the specified dataset is available in the file + system. If not, it downloads and extracts the dataset into the + appropriate directory. + + Example: + Assume the dataset is not available on the file system:: + + >>> datamodule = MVTecAD( + ... root="./datasets/MVTecAD", + ... category="bottle" + ... ) + >>> datamodule.prepare_data() + + Directory structure after download:: + + datasets/ + └── MVTecAD/ + β”œβ”€β”€ bottle/ + β”œβ”€β”€ cable/ + └── ... + """ + if (self.root / self.category).is_dir(): + logger.info("Found the dataset.") + else: + download_and_extract(self.root, DOWNLOAD_INFO) + + +class MVTec(MVTecAD): + """MVTec datamodule class (Deprecated). + + This class is deprecated and will be removed in a future version. + Please use MVTecAD instead. + """ + + def __init__(self, *args, **kwargs) -> None: + import warnings + + warnings.warn( + "MVTec is deprecated and will be removed in a future version. Please use MVTecAD instead.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(*args, **kwargs) diff --git a/src/anomalib/data/datasets/__init__.py b/src/anomalib/data/datasets/__init__.py index 7011b7373a..e49d272f48 100644 --- a/src/anomalib/data/datasets/__init__.py +++ b/src/anomalib/data/datasets/__init__.py @@ -16,7 +16,7 @@ - ``DatumaroDataset``: Dataset in Datumaro format (Intel Getiβ„’ export) - ``FolderDataset``: Custom dataset from folder structure - ``KolektorDataset``: Kolektor surface defect dataset - - ``MVTecDataset``: MVTec AD dataset with industrial objects + - ``MVTecADDataset``: MVTec AD dataset with industrial objects - ``VisaDataset``: Visual Inspection of Surface Anomalies dataset Video Datasets: @@ -25,8 +25,8 @@ - ``UCSDpedDataset``: UCSD Pedestrian dataset for anomaly detection Example: - >>> from anomalib.data.datasets import MVTecDataset - >>> dataset = MVTecDataset( + >>> from anomalib.data.datasets import MVTecADDataset + >>> dataset = MVTecADDataset( ... root="./datasets/MVTec", ... category="bottle", ... split="train" @@ -38,7 +38,7 @@ from .base import AnomalibDataset, AnomalibDepthDataset, AnomalibVideoDataset from .depth import Folder3DDataset, MVTec3DDataset -from .image import BTechDataset, DatumaroDataset, FolderDataset, KolektorDataset, MVTecDataset, VisaDataset +from .image import BTechDataset, DatumaroDataset, FolderDataset, KolektorDataset, MVTecADDataset, VisaDataset from .video import AvenueDataset, ShanghaiTechDataset, UCSDpedDataset __all__ = [ @@ -54,7 +54,7 @@ "DatumaroDataset", "FolderDataset", "KolektorDataset", - "MVTecDataset", + "MVTecADDataset", "VisaDataset", # Video "AvenueDataset", diff --git a/src/anomalib/data/datasets/image/__init__.py b/src/anomalib/data/datasets/image/__init__.py index e319b8a36f..89832754a4 100644 --- a/src/anomalib/data/datasets/image/__init__.py +++ b/src/anomalib/data/datasets/image/__init__.py @@ -7,19 +7,19 @@ - ``DatumaroDataset``: Dataset in Datumaro format (Intel Getiβ„’ export) - ``FolderDataset``: Custom dataset from folder structure - ``KolektorDataset``: Kolektor surface defect dataset -- ``MVTecDataset``: MVTec AD dataset with industrial objects +- ``MVTecADDataset``: MVTec AD dataset with industrial objects - ``VisaDataset``: Visual Inspection of Surface Anomalies dataset Example: - >>> from anomalib.data.datasets import MVTecDataset - >>> dataset = MVTecDataset( + >>> from anomalib.data.datasets import MVTecADDataset + >>> dataset = MVTecADDataset( ... root="./datasets/MVTec", ... category="bottle", ... split="train" ... ) """ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2024-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from .btech import BTechDataset @@ -27,6 +27,7 @@ from .folder import FolderDataset from .kolektor import KolektorDataset from .mvtec import MVTecDataset +from .mvtec_ad import MVTecADDataset from .visa import VisaDataset __all__ = [ @@ -35,5 +36,6 @@ "FolderDataset", "KolektorDataset", "MVTecDataset", + "MVTecADDataset", "VisaDataset", ] diff --git a/src/anomalib/data/datasets/image/mvtec.py b/src/anomalib/data/datasets/image/mvtec.py index d651661ca0..2d41931551 100644 --- a/src/anomalib/data/datasets/image/mvtec.py +++ b/src/anomalib/data/datasets/image/mvtec.py @@ -57,8 +57,8 @@ ) -class MVTecDataset(AnomalibDataset): - """MVTec dataset class. +class MVTecADDataset(AnomalibDataset): + """MVTec AD dataset class. Dataset class for loading and processing MVTec AD dataset images. Supports both classification and segmentation tasks. @@ -75,8 +75,8 @@ class MVTecDataset(AnomalibDataset): Example: >>> from pathlib import Path - >>> from anomalib.data.datasets import MVTecDataset - >>> dataset = MVTecDataset( + >>> from anomalib.data.datasets import MVTecADDataset + >>> dataset = MVTecADDataset( ... root=Path("./datasets/MVTec"), ... category="bottle", ... split="train" @@ -220,3 +220,21 @@ def make_mvtec_dataset( samples = samples[samples.split == split].reset_index(drop=True) return samples + + +class MVTecDataset(MVTecADDataset): + """MVTec dataset class (Deprecated). + + This class is deprecated and will be removed in a future version. + Please use MVTecADDataset instead. + """ + + def __init__(self, *args, **kwargs) -> None: + import warnings + + warnings.warn( + "MVTecDataset is deprecated and will be removed in a future version. Please use MVTecADDataset instead.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(*args, **kwargs) diff --git a/src/anomalib/data/datasets/image/mvtec_ad.py b/src/anomalib/data/datasets/image/mvtec_ad.py new file mode 100644 index 0000000000..b3d176f5b6 --- /dev/null +++ b/src/anomalib/data/datasets/image/mvtec_ad.py @@ -0,0 +1,240 @@ +"""MVTec AD Dataset. + +This module provides PyTorch Dataset implementation for the MVTec AD dataset. The +dataset will be downloaded and extracted automatically if not found locally. + +The dataset contains 15 categories of industrial objects with both normal and +anomalous samples. Each category includes RGB images and pixel-level ground truth +masks for anomaly segmentation. + +License: + MVTec AD dataset is released under the Creative Commons + Attribution-NonCommercial-ShareAlike 4.0 International License + (CC BY-NC-SA 4.0) https://creativecommons.org/licenses/by-nc-sa/4.0/ + +Reference: + Bergmann, P., Batzner, K., Fauser, M., Sattlegger, D., & Steger, C. (2021). + The MVTec Anomaly Detection Dataset: A Comprehensive Real-World Dataset for + Unsupervised Anomaly Detection. International Journal of Computer Vision, + 129(4), 1038-1059. + + Bergmann, P., Fauser, M., Sattlegger, D., & Steger, C. (2019). MVTec AD β€” + A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection. In + IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), + 9584-9592. +""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Sequence +from pathlib import Path + +from pandas import DataFrame +from torchvision.transforms.v2 import Transform + +from anomalib.data.datasets.base import AnomalibDataset +from anomalib.data.errors import MisMatchError +from anomalib.data.utils import LabelName, Split, validate_path + +IMG_EXTENSIONS = (".png", ".PNG") +CATEGORIES = ( + "bottle", + "cable", + "capsule", + "carpet", + "grid", + "hazelnut", + "leather", + "metal_nut", + "pill", + "screw", + "tile", + "toothbrush", + "transistor", + "wood", + "zipper", +) + + +class MVTecADDataset(AnomalibDataset): + """MVTec AD dataset class. + + Dataset class for loading and processing MVTec AD dataset images. Supports + both classification and segmentation tasks. + + Args: + root (Path | str): Path to root directory containing the dataset. + Defaults to ``"./datasets/MVTecAD"``. + category (str): Category name, must be one of ``CATEGORIES``. + Defaults to ``"bottle"``. + augmentations (Transform, optional): Augmentations that should be applied to the input images. + Defaults to ``None``. + split (str | Split | None, optional): Dataset split - usually + ``Split.TRAIN`` or ``Split.TEST``. Defaults to ``None``. + + Example: + >>> from pathlib import Path + >>> from anomalib.data.datasets import MVTecADDataset + >>> dataset = MVTecADDataset( + ... root=Path("./datasets/MVTecAD"), + ... category="bottle", + ... split="train" + ... ) + + For classification tasks, each sample contains: + + >>> sample = dataset[0] + >>> list(sample.keys()) + ['image_path', 'label', 'image'] + + For segmentation tasks, samples also include mask paths and masks: + + >>> dataset.task = "segmentation" + >>> sample = dataset[0] + >>> list(sample.keys()) + ['image_path', 'label', 'image', 'mask_path', 'mask'] + + Images are PyTorch tensors with shape ``(C, H, W)``, masks have shape + ``(H, W)``: + + >>> sample["image"].shape, sample["mask"].shape + (torch.Size([3, 256, 256]), torch.Size([256, 256])) + """ + + def __init__( + self, + root: Path | str = "./datasets/MVTecAD", + category: str = "bottle", + augmentations: Transform | None = None, + split: str | Split | None = None, + ) -> None: + super().__init__(augmentations=augmentations) + + self.root_category = Path(root) / Path(category) + self.category = category + self.split = split + self.samples = make_mvtec_dataset( + self.root_category, + split=self.split, + extensions=IMG_EXTENSIONS, + ) + + +def make_mvtec_dataset( + root: str | Path, + split: str | Split | None = None, + extensions: Sequence[str] | None = None, +) -> DataFrame: + """Create MVTec AD samples by parsing the data directory structure. + + The files are expected to follow the structure: + ``path/to/dataset/split/category/image_filename.png`` + ``path/to/dataset/ground_truth/category/mask_filename.png`` + + Args: + root (Path | str): Path to dataset root directory + split (str | Split | None, optional): Dataset split (train or test) + Defaults to ``None``. + extensions (Sequence[str] | None, optional): Valid file extensions + Defaults to ``None``. + + Returns: + DataFrame: Dataset samples with columns: + - path: Base path to dataset + - split: Dataset split (train/test) + - label: Class label + - image_path: Path to image file + - mask_path: Path to mask file (if available) + - label_index: Numeric label (0=normal, 1=abnormal) + + Example: + >>> root = Path("./datasets/MVTec/bottle") + >>> samples = make_mvtec_dataset(root, split="train") + >>> samples.head() + path split label image_path mask_path label_index + 0 datasets/MVTec/bottle train good [...]/good/105.png 0 + 1 datasets/MVTec/bottle train good [...]/good/017.png 0 + + Raises: + RuntimeError: If no valid images are found + MisMatchError: If anomalous images and masks don't match + """ + if extensions is None: + extensions = IMG_EXTENSIONS + + root = validate_path(root) + samples_list = [(str(root),) + f.parts[-3:] for f in root.glob(r"**/*") if f.suffix in extensions] + if not samples_list: + msg = f"Found 0 images in {root}" + raise RuntimeError(msg) + + samples = DataFrame(samples_list, columns=["path", "split", "label", "image_path"]) + + # Modify image_path column by converting to absolute path + samples["image_path"] = samples.path + "/" + samples.split + "/" + samples.label + "/" + samples.image_path + + # Create label index for normal (0) and anomalous (1) images. + samples.loc[(samples.label == "good"), "label_index"] = LabelName.NORMAL + samples.loc[(samples.label != "good"), "label_index"] = LabelName.ABNORMAL + samples.label_index = samples.label_index.astype(int) + + # separate masks from samples + mask_samples = samples.loc[samples.split == "ground_truth"].sort_values( + by="image_path", + ignore_index=True, + ) + samples = samples[samples.split != "ground_truth"].sort_values( + by="image_path", + ignore_index=True, + ) + + # assign mask paths to anomalous test images + samples["mask_path"] = "" + samples.loc[ + (samples.split == "test") & (samples.label_index == LabelName.ABNORMAL), + "mask_path", + ] = mask_samples.image_path.to_numpy() + + # assert that the right mask files are associated with the right test images + abnormal_samples = samples.loc[samples.label_index == LabelName.ABNORMAL] + if ( + len(abnormal_samples) + and not abnormal_samples.apply( + lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, + axis=1, + ).all() + ): + msg = ( + "Mismatch between anomalous images and ground truth masks. Make sure " + "mask files in 'ground_truth' folder follow the same naming " + "convention as the anomalous images (e.g. image: '000.png', " + "mask: '000.png' or '000_mask.png')." + ) + raise MisMatchError(msg) + + # infer the task type + samples.attrs["task"] = "classification" if (samples["mask_path"] == "").all() else "segmentation" + + if split: + samples = samples[samples.split == split].reset_index(drop=True) + + return samples + + +class MVTecDataset(MVTecADDataset): + """MVTec dataset class (Deprecated). + + This class is deprecated and will be removed in a future version. + Please use MVTecADDataset instead. + """ + + def __init__(self, *args, **kwargs) -> None: + import warnings + + warnings.warn( + "MVTecADDataset is deprecated and will be removed in a future version. Please use MVTecADDataset instead.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(*args, **kwargs) diff --git a/src/anomalib/data/datasets/image/visa.py b/src/anomalib/data/datasets/image/visa.py index d07942945d..af054324e4 100644 --- a/src/anomalib/data/datasets/image/visa.py +++ b/src/anomalib/data/datasets/image/visa.py @@ -28,7 +28,7 @@ from torchvision.transforms.v2 import Transform from anomalib.data.datasets import AnomalibDataset -from anomalib.data.datasets.image.mvtec import make_mvtec_dataset +from anomalib.data.datasets.image.mvtec_ad import make_mvtec_dataset from anomalib.data.utils import Split EXTENSIONS = (".png", ".jpg", ".JPG") diff --git a/src/anomalib/engine/engine.py b/src/anomalib/engine/engine.py index b97df79e9f..b6e03bb038 100644 --- a/src/anomalib/engine/engine.py +++ b/src/anomalib/engine/engine.py @@ -501,11 +501,11 @@ def test( Examples: # fit and test a one-class model - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Padim() >>> model.learning_type @@ -515,11 +515,11 @@ def test( >>> engine.test(model, datamodule=datamodule) # Test a zero-shot model - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim >>> from anomalib.engine import Engine - >>> datamodule = MVTec(image_size=240, normalization="clip") + >>> datamodule = MVTecAD(image_size=240, normalization="clip") >>> model = Padim() >>> model.learning_type diff --git a/src/anomalib/models/__init__.py b/src/anomalib/models/__init__.py index 8b36027b7d..78fc07245c 100644 --- a/src/anomalib/models/__init__.py +++ b/src/anomalib/models/__init__.py @@ -3,12 +3,12 @@ This module contains all the anomaly detection models available in anomalib. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim >>> from anomalib.engine import Engine >>> # Initialize model and datamodule - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Padim() >>> # Train using the engine diff --git a/src/anomalib/models/image/__init__.py b/src/anomalib/models/image/__init__.py index b517d8a254..2717290f3a 100644 --- a/src/anomalib/models/image/__init__.py +++ b/src/anomalib/models/image/__init__.py @@ -5,11 +5,11 @@ Example: >>> from anomalib.models.image import Padim, Patchcore - >>> from anomalib.data import MVTec # doctest: +SKIP + >>> from anomalib.data import MVTecAD # doctest: +SKIP >>> from anomalib.engine import Engine # doctest: +SKIP >>> # Initialize model and data - >>> datamodule = MVTec() # doctest: +SKIP + >>> datamodule = MVTecAD() # doctest: +SKIP >>> model = Padim() # doctest: +SKIP >>> # Train using the Engine diff --git a/src/anomalib/models/image/cfa/README.md b/src/anomalib/models/image/cfa/README.md index 0066b801f0..cfe4346beb 100755 --- a/src/anomalib/models/image/cfa/README.md +++ b/src/anomalib/models/image/cfa/README.md @@ -16,7 +16,7 @@ Coupled-hypersphere-based Feature Adaptation (CFA) localizes anomalies using fea ## Usage -`anomalib train --model Cfa --data MVTec --data.category ` +`anomalib train --model Cfa --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/cfa/__init__.py b/src/anomalib/models/image/cfa/__init__.py index f96930f53d..3e4adc1c03 100644 --- a/src/anomalib/models/image/cfa/__init__.py +++ b/src/anomalib/models/image/cfa/__init__.py @@ -11,12 +11,12 @@ Paper: https://arxiv.org/abs/2206.04325 Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models.image import Cfa >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Cfa() >>> # Train using the Engine diff --git a/src/anomalib/models/image/cflow/README.md b/src/anomalib/models/image/cflow/README.md index 2ecf9353fc..6505b2e188 100644 --- a/src/anomalib/models/image/cflow/README.md +++ b/src/anomalib/models/image/cflow/README.md @@ -14,7 +14,7 @@ CFLOW model is based on a conditional normalizing flow framework adopted for ano ## Usage -`anomalib train --model Cflow --data MVTec --data.category ` +`anomalib train --model Cflow --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/csflow/README.md b/src/anomalib/models/image/csflow/README.md index 6b0dfc3cec..27c92669ea 100644 --- a/src/anomalib/models/image/csflow/README.md +++ b/src/anomalib/models/image/csflow/README.md @@ -31,7 +31,7 @@ The anomaly score for each local position $(i,j)$ of the feature map $y^s$ at sc ## Usage -`anomalib train --model Csflow --data MVTec --data.category ` +`anomalib train --model Csflow --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/dfkde/README.md b/src/anomalib/models/image/dfkde/README.md index 828863f429..2909f28e76 100644 --- a/src/anomalib/models/image/dfkde/README.md +++ b/src/anomalib/models/image/dfkde/README.md @@ -16,7 +16,7 @@ In the anomaly classification stage, the features are first reduced to the first ## Usage -`anomalib train --model Dfkde --data MVTec --data.category ` +`anomalib train --model Dfkde --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/dfm/README.md b/src/anomalib/models/image/dfm/README.md index 88dd423f7f..9681e067b1 100644 --- a/src/anomalib/models/image/dfm/README.md +++ b/src/anomalib/models/image/dfm/README.md @@ -18,7 +18,7 @@ In the anomaly classification stage, class-conditional PCA transformations and G ## Usage -`anomalib train --model Dfm --data MVTec --data.category ` +`anomalib train --model Dfm --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/draem/README.md b/src/anomalib/models/image/draem/README.md index 0fa650bcc0..15ac583689 100644 --- a/src/anomalib/models/image/draem/README.md +++ b/src/anomalib/models/image/draem/README.md @@ -16,7 +16,7 @@ For optimal results, DRAEM requires specifying the path to a folder of image dat ## Usage -`anomalib train --model Draem --data MVTec --data.category ` +`anomalib train --model Draem --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/dsr/README.md b/src/anomalib/models/image/dsr/README.md index 35278bfaf7..dcab9f7972 100644 --- a/src/anomalib/models/image/dsr/README.md +++ b/src/anomalib/models/image/dsr/README.md @@ -14,7 +14,7 @@ DSR is a quantized-feature based algorithm that consists of an autoencoder with ## Usage -`anomalib train --model Dsr --data MVTec --data.category ` +`anomalib train --model Dsr --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/efficient_ad/lightning_model.py b/src/anomalib/models/image/efficient_ad/lightning_model.py index 5a0c6c5ee3..d9f3434f98 100644 --- a/src/anomalib/models/image/efficient_ad/lightning_model.py +++ b/src/anomalib/models/image/efficient_ad/lightning_model.py @@ -12,11 +12,11 @@ - Anomaly detection via feature comparison Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import EfficientAd >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = EfficientAd() >>> engine = Engine() diff --git a/src/anomalib/models/image/fastflow/README.md b/src/anomalib/models/image/fastflow/README.md index 0ce47d9a44..0422c558c6 100644 --- a/src/anomalib/models/image/fastflow/README.md +++ b/src/anomalib/models/image/fastflow/README.md @@ -14,7 +14,7 @@ FastFlow is a two-dimensional normalizing flow-based probability distribution es ## Usage -`anomalib train --model Fastflow --data MVTec --data.category ` +`anomalib train --model Fastflow --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/fastflow/__init__.py b/src/anomalib/models/image/fastflow/__init__.py index 13c7f19483..5638f849a3 100644 --- a/src/anomalib/models/image/fastflow/__init__.py +++ b/src/anomalib/models/image/fastflow/__init__.py @@ -5,11 +5,11 @@ The model achieves competitive performance while maintaining fast inference times. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Fastflow >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Fastflow() >>> engine = Engine() diff --git a/src/anomalib/models/image/fastflow/lightning_model.py b/src/anomalib/models/image/fastflow/lightning_model.py index 85a696ecfd..c48a5bb106 100644 --- a/src/anomalib/models/image/fastflow/lightning_model.py +++ b/src/anomalib/models/image/fastflow/lightning_model.py @@ -9,11 +9,11 @@ can be efficiently modeled. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Fastflow >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Fastflow() >>> engine = Engine() diff --git a/src/anomalib/models/image/fre/__init__.py b/src/anomalib/models/image/fre/__init__.py index 91646c778f..532bcb31a0 100755 --- a/src/anomalib/models/image/fre/__init__.py +++ b/src/anomalib/models/image/fre/__init__.py @@ -6,11 +6,11 @@ reconstruction error. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Fre >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Fre() >>> engine = Engine() diff --git a/src/anomalib/models/image/fre/lightning_model.py b/src/anomalib/models/image/fre/lightning_model.py index 6021f6655a..ef5f02dba1 100755 --- a/src/anomalib/models/image/fre/lightning_model.py +++ b/src/anomalib/models/image/fre/lightning_model.py @@ -6,11 +6,11 @@ the reconstruction error between the original and reconstructed features. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Fre >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Fre() >>> engine = Engine() diff --git a/src/anomalib/models/image/ganomaly/README.md b/src/anomalib/models/image/ganomaly/README.md index df3a3f2acd..f2dc0ee06a 100644 --- a/src/anomalib/models/image/ganomaly/README.md +++ b/src/anomalib/models/image/ganomaly/README.md @@ -16,7 +16,7 @@ The key idea here is that, during inference, when an anomalous image is passed t ## Usage -`anomalib train --model Ganomaly --data MVTec --data.category ` +`anomalib train --model Ganomaly --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/ganomaly/__init__.py b/src/anomalib/models/image/ganomaly/__init__.py index 76c6a5332a..8fb3c72a42 100644 --- a/src/anomalib/models/image/ganomaly/__init__.py +++ b/src/anomalib/models/image/ganomaly/__init__.py @@ -6,11 +6,11 @@ reconstructions are realistic. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Ganomaly >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Ganomaly() >>> engine = Engine() diff --git a/src/anomalib/models/image/ganomaly/lightning_model.py b/src/anomalib/models/image/ganomaly/lightning_model.py index a18bfad965..5030896918 100644 --- a/src/anomalib/models/image/ganomaly/lightning_model.py +++ b/src/anomalib/models/image/ganomaly/lightning_model.py @@ -6,11 +6,11 @@ reconstructions are realistic. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Ganomaly >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Ganomaly() >>> engine = Engine() diff --git a/src/anomalib/models/image/padim/README.md b/src/anomalib/models/image/padim/README.md index 09043e09c5..801962b734 100644 --- a/src/anomalib/models/image/padim/README.md +++ b/src/anomalib/models/image/padim/README.md @@ -16,7 +16,7 @@ During inference, Mahalanobis distance is used to score each patch position of t ## Usage -`anomalib train --model Padim --data MVTec --data.category ` +`anomalib train --model Padim --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/padim/lightning_model.py b/src/anomalib/models/image/padim/lightning_model.py index b9c6c441ca..eac251fc11 100644 --- a/src/anomalib/models/image/padim/lightning_model.py +++ b/src/anomalib/models/image/padim/lightning_model.py @@ -12,12 +12,12 @@ Paper: https://arxiv.org/abs/2011.08785 Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models.image.padim import Padim >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Padim( ... backbone="resnet18", ... layers=["layer1", "layer2", "layer3"], @@ -85,11 +85,11 @@ class Padim(MemoryBankMixin, AnomalibModule): Example: >>> from anomalib.models import Padim - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Padim( ... backbone="resnet18", ... layers=["layer1", "layer2", "layer3"], diff --git a/src/anomalib/models/image/patchcore/README.md b/src/anomalib/models/image/patchcore/README.md index 03a4c54d90..764f58563c 100644 --- a/src/anomalib/models/image/patchcore/README.md +++ b/src/anomalib/models/image/patchcore/README.md @@ -16,7 +16,7 @@ During inference this memory bank is coreset subsampled. Coreset subsampling gen ## Usage -`anomalib train --model Patchcore --data MVTec --data.category ` +`anomalib train --model Patchcore --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/patchcore/__init__.py b/src/anomalib/models/image/patchcore/__init__.py index d3b0e36832..73a57ac196 100644 --- a/src/anomalib/models/image/patchcore/__init__.py +++ b/src/anomalib/models/image/patchcore/__init__.py @@ -10,12 +10,12 @@ high performance while maintaining interpretability through localization maps. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Patchcore >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Patchcore( ... backbone="wide_resnet50_2", ... layers=["layer2", "layer3"], diff --git a/src/anomalib/models/image/patchcore/lightning_model.py b/src/anomalib/models/image/patchcore/lightning_model.py index 60ce92397b..fe742a9c82 100644 --- a/src/anomalib/models/image/patchcore/lightning_model.py +++ b/src/anomalib/models/image/patchcore/lightning_model.py @@ -10,12 +10,12 @@ performance while maintaining interpretability through localization maps. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Patchcore >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Patchcore( ... backbone="wide_resnet50_2", ... layers=["layer2", "layer3"], @@ -96,12 +96,12 @@ class Patchcore(MemoryBankMixin, AnomalibModule): Defaults to ``True``. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Patchcore >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Patchcore( ... backbone="wide_resnet50_2", ... layers=["layer2", "layer3"], diff --git a/src/anomalib/models/image/reverse_distillation/README.md b/src/anomalib/models/image/reverse_distillation/README.md index 61a6e8c672..67ae59e310 100644 --- a/src/anomalib/models/image/reverse_distillation/README.md +++ b/src/anomalib/models/image/reverse_distillation/README.md @@ -16,7 +16,7 @@ During testing, a similar step is followed but this time the cosine distance bet ## Usage -`anomalib train --model ReverseDistillation --data MVTec --data.category ` +`anomalib train --model ReverseDistillation --data MVTecAD --data.category ` ## Benchmark diff --git a/src/anomalib/models/image/reverse_distillation/__init__.py b/src/anomalib/models/image/reverse_distillation/__init__.py index aba85506b3..c5fe345fd6 100644 --- a/src/anomalib/models/image/reverse_distillation/__init__.py +++ b/src/anomalib/models/image/reverse_distillation/__init__.py @@ -12,11 +12,11 @@ Example: >>> from anomalib.models import ReverseDistillation - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = ReverseDistillation() >>> # Train using the Engine diff --git a/src/anomalib/models/image/reverse_distillation/lightning_model.py b/src/anomalib/models/image/reverse_distillation/lightning_model.py index 8a72b86cc7..71817878a7 100644 --- a/src/anomalib/models/image/reverse_distillation/lightning_model.py +++ b/src/anomalib/models/image/reverse_distillation/lightning_model.py @@ -11,11 +11,11 @@ Example: >>> from anomalib.models import ReverseDistillation - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = ReverseDistillation( ... backbone="wide_resnet50_2", ... layers=["layer1", "layer2", "layer3"] diff --git a/src/anomalib/models/image/stfpm/README.md b/src/anomalib/models/image/stfpm/README.md index 0b14d343ef..69a739da07 100644 --- a/src/anomalib/models/image/stfpm/README.md +++ b/src/anomalib/models/image/stfpm/README.md @@ -16,13 +16,13 @@ During inference, the feature pyramids of teacher and student networks are compa ## Usage -`anomalib train --model Stfpm --data MVTec --data.category ` +`anomalib train --model Stfpm --data MVTecAD --data.category ` ## Benchmark All results gathered with seed `42`. -## [MVTec AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad) +## [MVTecAD AD Dataset](https://www.mvtec.com/company/research/datasets/mvtec-ad) ### Image-Level AUC diff --git a/src/anomalib/models/image/stfpm/__init__.py b/src/anomalib/models/image/stfpm/__init__.py index dfcc6bbff6..48f89c34f5 100644 --- a/src/anomalib/models/image/stfpm/__init__.py +++ b/src/anomalib/models/image/stfpm/__init__.py @@ -13,9 +13,9 @@ Example: >>> from anomalib.models.image import Stfpm >>> from anomalib.engine import Engine - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Stfpm() >>> engine = Engine(model=model, datamodule=datamodule) diff --git a/src/anomalib/models/image/stfpm/lightning_model.py b/src/anomalib/models/image/stfpm/lightning_model.py index 4f5e977254..713fd24bef 100644 --- a/src/anomalib/models/image/stfpm/lightning_model.py +++ b/src/anomalib/models/image/stfpm/lightning_model.py @@ -12,8 +12,8 @@ Example: >>> from anomalib.models.image import Stfpm >>> from anomalib.engine import Engine - >>> from anomalib.data import MVTec - >>> datamodule = MVTec() + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD() >>> model = Stfpm( ... backbone="resnet18", ... layers=["layer1", "layer2", "layer3"] @@ -78,9 +78,9 @@ class Stfpm(AnomalibModule): Example: >>> from anomalib.models.image import Stfpm - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Stfpm( ... backbone="resnet18", ... layers=["layer1", "layer2", "layer3"] diff --git a/src/anomalib/models/image/supersimplenet/README.md b/src/anomalib/models/image/supersimplenet/README.md index ed9092fbdc..b33665f534 100644 --- a/src/anomalib/models/image/supersimplenet/README.md +++ b/src/anomalib/models/image/supersimplenet/README.md @@ -26,17 +26,17 @@ This implementation supports both unsupervised and supervised setting, but Anoma ## Usage -`anomalib train --model SuperSimpleNet --data MVTec --data.category ` +`anomalib train --model SuperSimpleNet --data MVTecAD --data.category ` > It is recommended to train the model for 300 epochs with batch size of 32 to achieve stable training with random anomaly generation. Training with lower parameter values will still work, but might not yield the optimal results. > > For supervised learning, refer to the [official code](https://github.com/blaz-r/SuperSimpleNet). -## MVTec AD results +## MVTecAD AD results The following results were obtained using this Anomalib implementation trained for 300 epochs with seed 0, default params, and batch size 32. | | **Image AUROC** | **Pixel AUPRO** | -| ----------- | :-------------: | :-------------: | +| ---------- | :-------------: | :-------------: | | Bottle | 1.000 | 0.903 | | Cable | 0.981 | 0.901 | | Capsule | 0.989 | 0.931 | diff --git a/src/anomalib/models/image/supersimplenet/lightning_model.py b/src/anomalib/models/image/supersimplenet/lightning_model.py index a6a0d78670..f7dd5cac4c 100644 --- a/src/anomalib/models/image/supersimplenet/lightning_model.py +++ b/src/anomalib/models/image/supersimplenet/lightning_model.py @@ -9,11 +9,11 @@ It delivers strong performance while maintaining fast inference. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Supersimplenet >>> from anomalib.engine import Engine - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Supersimplenet() >>> engine = Engine() diff --git a/src/anomalib/models/image/uflow/README.md b/src/anomalib/models/image/uflow/README.md index 45ac85a7fd..b51ad9963f 100644 --- a/src/anomalib/models/image/uflow/README.md +++ b/src/anomalib/models/image/uflow/README.md @@ -52,7 +52,7 @@ In order to obtain the same exact results, although the architecture parameters ## Usage -`anomalib train --model Uflow --data MVTec --data.category ` +`anomalib train --model Uflow --data MVTecAD --data.category ` ## Download data @@ -109,9 +109,9 @@ Normalizing Flow outputs ### Anomalies -#### MVTec +#### MVTecAD -![MVTec results - anomalies](/docs/source/images/uflow/results-mvtec-anomalies.jpg "MVTec results - anomalies") +![MVTecAD results - anomalies](/docs/source/images/uflow/results-mvtec-anomalies.jpg "MVTecAD results - anomalies") #### BeanTech, LGG MRI, STC @@ -119,9 +119,9 @@ Normalizing Flow outputs ### Normal images -#### MVTec +#### MVTecAD -![MVTec results - normal](/docs/source/images/uflow/results-mvtec-good.jpg "MVTec results - normal") +![MVTecAD results - normal](/docs/source/images/uflow/results-mvtec-good.jpg "MVTecAD results - normal") #### BeanTech, LGG MRI, STC diff --git a/src/anomalib/models/image/uflow/__init__.py b/src/anomalib/models/image/uflow/__init__.py index 71693e3b69..3d665dd68b 100644 --- a/src/anomalib/models/image/uflow/__init__.py +++ b/src/anomalib/models/image/uflow/__init__.py @@ -12,9 +12,9 @@ Example: >>> from anomalib.models.image import Uflow >>> from anomalib.engine import Engine - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = Uflow() >>> engine = Engine(model=model, datamodule=datamodule) diff --git a/src/anomalib/models/image/uflow/lightning_model.py b/src/anomalib/models/image/uflow/lightning_model.py index 02715837e9..ee0b2a0acc 100644 --- a/src/anomalib/models/image/uflow/lightning_model.py +++ b/src/anomalib/models/image/uflow/lightning_model.py @@ -12,8 +12,8 @@ Example: >>> from anomalib.models.image import Uflow >>> from anomalib.engine import Engine - >>> from anomalib.data import MVTec - >>> datamodule = MVTec() + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD() >>> model = Uflow() >>> engine = Engine(model=model, datamodule=datamodule) >>> engine.fit() # doctest: +SKIP @@ -89,8 +89,8 @@ class Uflow(AnomalibModule): Example: >>> from anomalib.models.image import Uflow >>> from anomalib.engine import Engine - >>> from anomalib.data import MVTec - >>> datamodule = MVTec() + >>> from anomalib.data import MVTecAD + >>> datamodule = MVTecAD() >>> model = Uflow(backbone="resnet18") >>> engine = Engine(model=model, datamodule=datamodule) >>> engine.fit() # doctest: +SKIP diff --git a/src/anomalib/models/image/vlm_ad/__init__.py b/src/anomalib/models/image/vlm_ad/__init__.py index 271ab257a4..75493ef1c0 100644 --- a/src/anomalib/models/image/vlm_ad/__init__.py +++ b/src/anomalib/models/image/vlm_ad/__init__.py @@ -6,11 +6,11 @@ Example: >>> from anomalib.models.image import VlmAd - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine >>> # Initialize model and data - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> model = VlmAd( ... backend="chatgpt", ... model_name="gpt-4-vision-preview" diff --git a/src/anomalib/models/image/vlm_ad/lightning_model.py b/src/anomalib/models/image/vlm_ad/lightning_model.py index 57e3a76be4..6572cfeb13 100644 --- a/src/anomalib/models/image/vlm_ad/lightning_model.py +++ b/src/anomalib/models/image/vlm_ad/lightning_model.py @@ -11,7 +11,7 @@ Example: >>> from anomalib.models.image import VlmAd - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine >>> model = VlmAd( # doctest: +SKIP @@ -19,7 +19,7 @@ ... api_key="YOUR_API_KEY", ... k_shot=3 ... ) - >>> datamodule = MVTec() + >>> datamodule = MVTecAD() >>> engine = Engine() >>> predictions = engine.predict(model=model, datamodule=datamodule) # doctest: +SKIP diff --git a/src/anomalib/models/image/winclip/README.md b/src/anomalib/models/image/winclip/README.md index 24a1fa8e4e..be60e7ffc4 100644 --- a/src/anomalib/models/image/winclip/README.md +++ b/src/anomalib/models/image/winclip/README.md @@ -22,11 +22,11 @@ WinCLIP is a zero-shot model, which means that we can directly evaluate the mode ### 0-Shot -`anomalib test --model WinClip --data MVTec` +`anomalib test --model WinClip --data MVTecAD` ### 1-Shot -`anomalib test --model WinClip --model.k_shot 1 --data MVTec` +`anomalib test --model WinClip --model.k_shot 1 --data MVTecAD` ## Parameters @@ -46,30 +46,30 @@ Coming soon... - +| | Avg | Carpet | Grid | Leather | Tile | Wood | Bottle | Cable | Capsule | Hazelnut | Metal Nut | Pill | Screw | Toothbrush | Transistor | Zipper | | +| ------ | :-: | :----: | :--: | :-----: | :--: | :--: | :----: | :---: | :-----: | :------: | :-------: | :--: | :---: | :--------: | :--------: | :----: | --- | +| 0-shot | | | | | | | | | | | | | | | | | | +| 1-shot | | | | | | | | | | | | | | | | | | +| 2-shot | | | | | | | | | | | | | | | | | | +| 4-shot | | | | | | | | | | | | | | | | | | diff --git a/src/anomalib/models/image/winclip/lightning_model.py b/src/anomalib/models/image/winclip/lightning_model.py index ebc8e46853..9e53e0e934 100644 --- a/src/anomalib/models/image/winclip/lightning_model.py +++ b/src/anomalib/models/image/winclip/lightning_model.py @@ -7,11 +7,11 @@ comparing image regions with normal reference examples through CLIP embeddings. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.engine import Engine >>> from anomalib.models.image import WinClip - >>> datamodule = MVTec(root="./datasets/MVTec") # doctest: +SKIP + >>> datamodule = MVTecAD(root="./datasets/MVTecAD") # doctest: +SKIP >>> model = WinClip() # doctest: +SKIP >>> Engine.test(model=model, datamodule=datamodule) # doctest: +SKIP diff --git a/src/anomalib/pipelines/benchmark/__init__.py b/src/anomalib/pipelines/benchmark/__init__.py index 759ba32276..2c83d81f13 100644 --- a/src/anomalib/pipelines/benchmark/__init__.py +++ b/src/anomalib/pipelines/benchmark/__init__.py @@ -6,13 +6,13 @@ Example: >>> from anomalib.pipelines import Benchmark - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim, Patchcore >>> # Initialize benchmark with models and datasets >>> benchmark = Benchmark( ... models=[Padim(), Patchcore()], - ... datasets=[MVTec(category="bottle"), MVTec(category="cable")] + ... datasets=[MVTecAD(category="bottle"), MVTecAD(category="cable")] ... ) >>> # Run benchmark diff --git a/src/anomalib/pipelines/benchmark/job.py b/src/anomalib/pipelines/benchmark/job.py index dccacf77e7..664a4cc581 100644 --- a/src/anomalib/pipelines/benchmark/job.py +++ b/src/anomalib/pipelines/benchmark/job.py @@ -5,13 +5,13 @@ and collects performance metrics. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim >>> from anomalib.pipelines.benchmark.job import BenchmarkJob >>> # Initialize model, datamodule and job >>> model = Padim() - >>> datamodule = MVTec(category="bottle") + >>> datamodule = MVTecAD(category="bottle") >>> job = BenchmarkJob( ... accelerator="gpu", ... model=model, @@ -68,13 +68,13 @@ class BenchmarkJob(Job): flat_cfg (dict): Flattened configuration dictionary with dotted keys. Example: - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim >>> from anomalib.pipelines.benchmark.job import BenchmarkJob >>> # Initialize model, datamodule and job >>> model = Padim() - >>> datamodule = MVTec(category="bottle") + >>> datamodule = MVTecAD(category="bottle") >>> job = BenchmarkJob( ... accelerator="gpu", ... model=model, diff --git a/src/anomalib/pipelines/benchmark/pipeline.py b/src/anomalib/pipelines/benchmark/pipeline.py index 9e31c4e043..9319111b3c 100644 --- a/src/anomalib/pipelines/benchmark/pipeline.py +++ b/src/anomalib/pipelines/benchmark/pipeline.py @@ -6,13 +6,13 @@ Example: >>> from anomalib.pipelines import Benchmark - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim, Patchcore >>> # Initialize benchmark with models and datasets >>> benchmark = Benchmark( ... models=[Padim(), Patchcore()], - ... datasets=[MVTec(category="bottle"), MVTec(category="cable")] + ... datasets=[MVTecAD(category="bottle"), MVTecAD(category="cable")] ... ) >>> # Run benchmark @@ -43,13 +43,13 @@ class Benchmark(Pipeline): Example: >>> from anomalib.pipelines import Benchmark - >>> from anomalib.data import MVTec + >>> from anomalib.data import MVTecAD >>> from anomalib.models import Padim, Patchcore >>> # Initialize benchmark with models and datasets >>> benchmark = Benchmark( ... models=[Padim(), Patchcore()], - ... datasets=[MVTec(category="bottle"), MVTec(category="cable")] + ... datasets=[MVTecAD(category="bottle"), MVTecAD(category="cable")] ... ) >>> # Run benchmark diff --git a/src/anomalib/utils/path.py b/src/anomalib/utils/path.py index 7c8643e417..41696832d9 100644 --- a/src/anomalib/utils/path.py +++ b/src/anomalib/utils/path.py @@ -281,9 +281,9 @@ def generate_output_filename( Examples: Basic usage with category: - >>> input_path = "/data/MVTec/bottle/test/broken_large/000.png" + >>> input_path = "/data/MVTecAD/bottle/test/broken_large/000.png" >>> output_base = "/results" - >>> dataset = "MVTec" + >>> dataset = "MVTecAD" >>> generate_output_filename(input_path, output_base, dataset, "bottle") PosixPath('/results/test/broken_large/000.png') diff --git a/tests/conftest.py b/tests/conftest.py index b2cfe0606d..cb90277ef8 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -10,7 +10,7 @@ import pytest -from anomalib.data import ImageDataFormat, MVTec, VideoDataFormat +from anomalib.data import ImageDataFormat, MVTecAD, VideoDataFormat from anomalib.engine import Engine from anomalib.models import get_model from tests.helpers.data import DummyImageDatasetGenerator, DummyVideoDatasetGenerator @@ -87,7 +87,7 @@ def checkpoint(model_name: str) -> Path: Since integration tests train all the models, model training occurs when running unit tests invididually. """ model = get_model(model_name) - _ckpt_path = project_path / model.name / "MVTec" / "dummy" / "latest" / "weights" / "lightning" / "model.ckpt" + _ckpt_path = project_path / model.name / "MVTecAD" / "dummy" / "latest" / "weights" / "lightning" / "model.ckpt" if not _ckpt_path.exists(): engine = Engine( logger=False, @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTec(root=dataset_path / "mvtec", category="dummy") + dataset = MVTecAD(root=dataset_path / "mvtec", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 541a787aea..613212f2c5 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -276,7 +276,7 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): seed (int, optional): Fixes seed if any number greater than 0 is provided. 0 means no seed. Defaults to 0. Examples: - To create an MVTec dataset with 10 training images and 10 testing images per category, use the following code. + To create an MVTecAD dataset with 10 training images and 10 testing images per category, use the following code. >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec", num_train=10, num_test=10) >>> dataset_generator.generate_dataset() diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index 1385d756c7..1bf765df8d 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -45,7 +45,7 @@ def test_test(self, dataset_path: Path, project_path: Path) -> None: "test", *self._get_common_cli_args(dataset_path, project_path), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) torch.cuda.empty_cache() @@ -62,7 +62,7 @@ def test_train(self, dataset_path: Path, project_path: Path) -> None: "train", *self._get_common_cli_args(dataset_path, project_path), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) torch.cuda.empty_cache() @@ -79,7 +79,7 @@ def test_validate(self, dataset_path: Path, project_path: Path) -> None: "validate", *self._get_common_cli_args(dataset_path, project_path), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) torch.cuda.empty_cache() @@ -87,13 +87,13 @@ def test_validate(self, dataset_path: Path, project_path: Path) -> None: def test_predict_with_dataloader(self, dataset_path: Path, project_path: Path) -> None: """Test the predict method of the CLI. - This test uses the MVTec dataloader for predict test. + This test uses the MVTecAD dataloader for predict test. Args: dataset_path (Path): Root of the synthetic/original dataset. project_path (Path): Path to temporary project folder. """ - # Test with MVTec Dataset + # Test with MVTecAD Dataset AnomalibCLI( args=[ "predict", @@ -102,7 +102,7 @@ def test_predict_with_dataloader(self, dataset_path: Path, project_path: Path) - project_path, ), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) torch.cuda.empty_cache() @@ -126,7 +126,7 @@ def test_predict_with_image_folder(self, project_path: Path) -> None: project_path, ), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) torch.cuda.empty_cache() @@ -150,7 +150,7 @@ def test_predict_with_image_path(self, project_path: Path) -> None: project_path, ), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) torch.cuda.empty_cache() @@ -175,7 +175,7 @@ def test_export( export_type, *self._get_common_cli_args(None, project_path), "--ckpt_path", - f"{project_path}/Padim/MVTec/dummy/v0/weights/lightning/model.ckpt", + f"{project_path}/Padim/MVTecAD/dummy/v0/weights/lightning/model.ckpt", ], ) @@ -188,10 +188,11 @@ def _get_common_cli_args(dataset_path: Path | None, project_path: Path) -> list[ project_path (Path): Path to the project folder. model_name (str): Name of the model. Defaults to None. """ - # We need to set the predict dataloader as MVTec and UCSDped do have have predict_dataloader attribute defined. + # We need to set the predict dataloader as MVTecAD and UCSDped do not + # have predict_dataloader attribute defined. if dataset_path: data_root = f"{dataset_path}/mvtec" - dataclass = "MVTec" + dataclass = "MVTecAD" data_args = [ "--data", dataclass, diff --git a/tests/integration/model/test_models.py b/tests/integration/model/test_models.py index 39de61297a..945ac65da1 100644 --- a/tests/integration/model/test_models.py +++ b/tests/integration/model/test_models.py @@ -14,7 +14,7 @@ import pytest -from anomalib.data import AnomalibDataModule, MVTec +from anomalib.data import AnomalibDataModule, MVTecAD from anomalib.deploy import ExportType from anomalib.engine import Engine from anomalib.models import AnomalibModule, get_available_models, get_model @@ -201,7 +201,7 @@ def _get_objects( else: # EfficientAd requires that the batch size be lesser than the number of images in the dataset. # This is so that the LR step size is not 0. - dataset = MVTec( + dataset = MVTecAD( root=dataset_path / "mvtec", category="dummy", # EfficientAd requires train batch size 1 diff --git a/tests/integration/pipelines/pipeline.yaml b/tests/integration/pipelines/pipeline.yaml index 114b125944..e80b920c05 100644 --- a/tests/integration/pipelines/pipeline.yaml +++ b/tests/integration/pipelines/pipeline.yaml @@ -8,7 +8,7 @@ benchmark: class_path: grid: [Padim, Patchcore] data: - class_path: MVTec + class_path: MVTecAD init_args: category: grid: diff --git a/tests/integration/tools/upgrade/expected_draem_v1.yaml b/tests/integration/tools/upgrade/expected_draem_v1.yaml index 0e65e8f49b..645c77e57e 100644 --- a/tests/integration/tools/upgrade/expected_draem_v1.yaml +++ b/tests/integration/tools/upgrade/expected_draem_v1.yaml @@ -1,7 +1,7 @@ data: - class_path: anomalib.data.MVTec + class_path: anomalib.data.MVTecAD init_args: - root: ./datasets/MVTec + root: ./datasets/MVTecAD category: bottle train_batch_size: 72 eval_batch_size: 32 diff --git a/tests/integration/tools/upgrade/original_draem_v0.yaml b/tests/integration/tools/upgrade/original_draem_v0.yaml index 0f98ca7d99..91cf34e063 100644 --- a/tests/integration/tools/upgrade/original_draem_v0.yaml +++ b/tests/integration/tools/upgrade/original_draem_v0.yaml @@ -1,7 +1,7 @@ dataset: name: mvtec format: mvtec - path: ./datasets/MVTec + path: ./datasets/MVTecAD category: bottle task: segmentation train_batch_size: 72 diff --git a/tests/unit/data/datamodule/image/test_mvtec.py b/tests/unit/data/datamodule/image/test_mvtec.py index b0ff74d86c..c9bee1ac3b 100644 --- a/tests/unit/data/datamodule/image/test_mvtec.py +++ b/tests/unit/data/datamodule/image/test_mvtec.py @@ -1,4 +1,4 @@ -"""Unit Tests - MVTec Datamodule.""" +"""Unit Tests - MVTecAD Datamodule.""" # Copyright (C) 2023-2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 @@ -8,18 +8,18 @@ import pytest from torchvision.transforms.v2 import Resize -from anomalib.data import MVTec +from anomalib.data import MVTecAD from tests.unit.data.datamodule.base.image import _TestAnomalibImageDatamodule -class TestMVTec(_TestAnomalibImageDatamodule): +class TestMVTecAD(_TestAnomalibImageDatamodule): """MVTec Datamodule Unit Tests.""" @pytest.fixture() @staticmethod - def datamodule(dataset_path: Path) -> MVTec: + def datamodule(dataset_path: Path) -> MVTecAD: """Create and return a MVTec datamodule.""" - _datamodule = MVTec( + _datamodule = MVTecAD( root=dataset_path / "mvtec", category="dummy", train_batch_size=4, diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py index 947fe3f843..98aa830ec3 100644 --- a/tests/unit/engine/test_engine.py +++ b/tests/unit/engine/test_engine.py @@ -8,7 +8,7 @@ import pytest import yaml -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.models import Padim @@ -107,7 +107,7 @@ def test_from_config(fxt_full_config_path: Path) -> None: assert model is not None assert isinstance(model, Padim) assert datamodule is not None - assert isinstance(datamodule, MVTec) + assert isinstance(datamodule, MVTecAD) assert datamodule.train_batch_size == 32 assert datamodule.num_workers == 8 diff --git a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py index 2bb04bbffe..88f61724fb 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py +++ b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py @@ -6,7 +6,7 @@ import tempfile from pathlib import Path -from anomalib.data import MVTec +from anomalib.data import MVTecAD from anomalib.engine import Engine from anomalib.loggers import AnomalibTensorBoardLogger @@ -24,7 +24,7 @@ def test_add_images(dataset_path: Path) -> None: limit_test_batches=1, accelerator="cpu", ) - engine.test(model=model, datamodule=MVTec(root=dataset_path / "mvtec", category="dummy")) + engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtec", category="dummy")) # test if images are logged assert len(list(Path(dir_loc).glob("**/*.png"))) >= 1, "Failed to save to local path" diff --git a/tests/unit/utils/test_visualizer.py b/tests/unit/utils/test_visualizer.py index 95a1947c03..26b65585b6 100644 --- a/tests/unit/utils/test_visualizer.py +++ b/tests/unit/utils/test_visualizer.py @@ -10,7 +10,7 @@ from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas from torch.utils.data import DataLoader -from anomalib.data import ImageBatch, MVTec, PredictDataset +from anomalib.data import ImageBatch, MVTecAD, PredictDataset from anomalib.engine import Engine from anomalib.models import Padim from anomalib.utils.visualization.image import _ImageGrid @@ -50,7 +50,7 @@ def test_model_visualizer_mode( fast_dev_run=True, devices=1, ) - datamodule = MVTec(root=dataset_path / "mvtec", category="dummy") + datamodule = MVTecAD(root=dataset_path / "mvtec", category="dummy") engine.test(model=model, datamodule=datamodule, ckpt_path=str(_ckpt_path)) dataset = PredictDataset(path=dataset_path / "mvtec" / "dummy" / "test") diff --git a/tools/upgrade/config.py b/tools/upgrade/config.py index bd97cc0834..eca48a5c3d 100644 --- a/tools/upgrade/config.py +++ b/tools/upgrade/config.py @@ -126,8 +126,8 @@ def upgrade_data_config(self) -> dict[str, Any]: # Get the dataset class name based on the format in the old config dataset_class_name = convert_snake_to_pascal_case(self.old_config["dataset"]["format"]) - # mvtec has an exception and is written as MVTec. Convert all Mvtec datasets to MVTec - dataset_class_name = dataset_class_name.replace("Mvtec", "MVTec") + # mvtec has an exception and is written as MVTecAD. Convert all Mvtec datasets to MVTecAD + dataset_class_name = dataset_class_name.replace("Mvtec", "MVTecAD") # Get the class path and init args. class_path = f"anomalib.data.{dataset_class_name}" From ffb2f928d65692b58c01555120267a839918cc2d Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Mon, 10 Feb 2025 15:55:51 +0000 Subject: [PATCH 02/19] Update the test dataset paths Signed-off-by: Samet Akcay --- src/anomalib/data/datamodules/image/__init__.py | 1 - src/anomalib/utils/config.py | 2 +- tests/conftest.py | 2 +- tests/helpers/data.py | 14 +++++++------- tests/integration/model/test_models.py | 2 +- tests/unit/data/datamodule/image/test_folder.py | 2 +- .../image/{test_mvtec.py => test_mvtec_ad.py} | 2 +- tests/unit/data/utils/test_synthetic.py | 2 +- .../visualizer_callback/dummy_lightning_model.py | 2 +- .../visualizer_callback/test_visualizer.py | 2 +- tests/unit/utils/test_visualizer.py | 4 ++-- 11 files changed, 17 insertions(+), 18 deletions(-) rename tests/unit/data/datamodule/image/{test_mvtec.py => test_mvtec_ad.py} (95%) diff --git a/src/anomalib/data/datamodules/image/__init__.py b/src/anomalib/data/datamodules/image/__init__.py index 66e87c7852..629fac9f9e 100644 --- a/src/anomalib/data/datamodules/image/__init__.py +++ b/src/anomalib/data/datamodules/image/__init__.py @@ -54,7 +54,6 @@ class ImageDataFormat(str, Enum): FOLDER_3D = "folder_3d" KOLEKTOR = "kolektor" MVTEC_AD = "mvtec_ad" - MVTEC = "mvtec" # Keep the value same for backward compatibility MVTEC_3D = "mvtec_3d" VISA = "visa" diff --git a/src/anomalib/utils/config.py b/src/anomalib/utils/config.py index 5c82790e0d..5a867c55bc 100644 --- a/src/anomalib/utils/config.py +++ b/src/anomalib/utils/config.py @@ -128,7 +128,7 @@ def to_yaml(config: Namespace | ListConfig | DictConfig) -> str: Examples: >>> from omegaconf import DictConfig - >>> config = DictConfig({"model": "padim", "dataset": {"name": "mvtec"}}) + >>> config = DictConfig({"model": "padim", "dataset": {"name": "mvtec_ad"}}) >>> yaml_str = to_yaml(config) >>> print(yaml_str) model: padim diff --git a/tests/conftest.py b/tests/conftest.py index cb90277ef8..560a6b9ef9 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTecAD(root=dataset_path / "mvtec", category="dummy") + dataset = MVTecAD(root=dataset_path / "mvtec_ad", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 613212f2c5..718aad1785 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -277,15 +277,15 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): Examples: To create an MVTecAD dataset with 10 training images and 10 testing images per category, use the following code. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec", num_train=10, num_test=10) + >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec_ad", num_train=10, num_test=10) >>> dataset_generator.generate_dataset() In order to provide a specific directory to save the dataset, use the ``root`` argument. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec", root="./datasets/dummy") + >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec_ad", root="./datasets/dummy") >>> dataset_generator.generate_dataset() It is also possible to use the generator as a context manager. - >>> with DummyImageDatasetGenerator(data_format="mvtec", num_train=10, num_test=10) as dataset_path: + >>> with DummyImageDatasetGenerator(data_format="mvtec_ad", num_train=10, num_test=10) as dataset_path: >>> some_function() To get the list of available datasets, use the ``DataFormat`` enum. @@ -298,7 +298,7 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): def __init__( self, - data_format: DataFormat | str = "mvtec", + data_format: DataFormat | str = "mvtec_ad", root: Path | str | None = None, normal_category: str = "good", abnormal_category: str = "bad", @@ -360,7 +360,7 @@ def _generate_dummy_datumaro_dataset(self) -> None: with annotation_file.open("w") as f: json.dump(annotations, f) - def _generate_dummy_mvtec_dataset( + def _generate_dummy_mvtec_ad_dataset( self, normal_dir: str = "good", abnormal_dir: str | None = None, @@ -414,7 +414,7 @@ def _generate_dummy_folder_dataset(self) -> None: def _generate_dummy_btech_dataset(self) -> None: """Generate dummy BeanTech dataset in directory using the same convention as BeanTech AD.""" # BeanTech AD follows the same convention as MVTec AD. - self._generate_dummy_mvtec_dataset(normal_dir="ok", abnormal_dir="ko", mask_suffix="") + self._generate_dummy_mvtec_ad_dataset(normal_dir="ok", abnormal_dir="ko", mask_suffix="") def _generate_dummy_mvtec_3d_dataset(self) -> None: """Generate dummy MVTec 3D AD dataset in a temporary directory using the same convention as MVTec AD.""" @@ -466,7 +466,7 @@ def _generate_dummy_visa_dataset(self) -> None: # Visa dataset on anomalib follows the same convention as MVTec AD. # The only difference is that the root directory has a subdirectory called "visa_pytorch". self.dataset_root = self.dataset_root.parent / "visa_pytorch" - self._generate_dummy_mvtec_dataset(normal_dir="good", abnormal_dir="bad", image_extension=".jpg") + self._generate_dummy_mvtec_ad_dataset(normal_dir="good", abnormal_dir="bad", image_extension=".jpg") class DummyVideoDatasetGenerator(DummyDatasetGenerator): diff --git a/tests/integration/model/test_models.py b/tests/integration/model/test_models.py index 945ac65da1..528c855e21 100644 --- a/tests/integration/model/test_models.py +++ b/tests/integration/model/test_models.py @@ -202,7 +202,7 @@ def _get_objects( # EfficientAd requires that the batch size be lesser than the number of images in the dataset. # This is so that the LR step size is not 0. dataset = MVTecAD( - root=dataset_path / "mvtec", + root=dataset_path / "mvtec_ad", category="dummy", # EfficientAd requires train batch size 1 train_batch_size=1 if model_name == "efficient_ad" else 2, diff --git a/tests/unit/data/datamodule/image/test_folder.py b/tests/unit/data/datamodule/image/test_folder.py index 466ddd1e09..2f632b2b92 100644 --- a/tests/unit/data/datamodule/image/test_folder.py +++ b/tests/unit/data/datamodule/image/test_folder.py @@ -28,7 +28,7 @@ def datamodule(dataset_path: Path) -> Folder: # Create and prepare the dataset _datamodule = Folder( name="dummy", - root=dataset_path / "mvtec" / "dummy", + root=dataset_path / "mvtec_ad" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/data/datamodule/image/test_mvtec.py b/tests/unit/data/datamodule/image/test_mvtec_ad.py similarity index 95% rename from tests/unit/data/datamodule/image/test_mvtec.py rename to tests/unit/data/datamodule/image/test_mvtec_ad.py index c9bee1ac3b..7f13d9b0ad 100644 --- a/tests/unit/data/datamodule/image/test_mvtec.py +++ b/tests/unit/data/datamodule/image/test_mvtec_ad.py @@ -20,7 +20,7 @@ class TestMVTecAD(_TestAnomalibImageDatamodule): def datamodule(dataset_path: Path) -> MVTecAD: """Create and return a MVTec datamodule.""" _datamodule = MVTecAD( - root=dataset_path / "mvtec", + root=dataset_path / "mvtec_ad", category="dummy", train_batch_size=4, eval_batch_size=4, diff --git a/tests/unit/data/utils/test_synthetic.py b/tests/unit/data/utils/test_synthetic.py index 90360078c8..9ac861ae53 100644 --- a/tests/unit/data/utils/test_synthetic.py +++ b/tests/unit/data/utils/test_synthetic.py @@ -18,7 +18,7 @@ def folder_dataset(dataset_path: Path) -> FolderDataset: """Fixture that returns a FolderDataset instance.""" return FolderDataset( name="dummy", - root=dataset_path / "mvtec" / "dummy", + root=dataset_path / "mvtec_ad" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py index 45389c949f..cd88f78e79 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py +++ b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py @@ -50,7 +50,7 @@ def test_step(self, *_, **__) -> ImageBatch: """Only used to trigger on_test_epoch_end.""" self.log(name="loss", value=0.0, prog_bar=True) return ImageBatch( - image_path=[Path(self.dataset_path / "mvtec" / "dummy" / "train" / "good" / "000.png")], + image_path=[Path(self.dataset_path / "mvtec_ad" / "dummy" / "train" / "good" / "000.png")], image=torch.rand((1, 3, 100, 100)).to(self.device), gt_mask=torch.zeros((1, 100, 100)).to(self.device), anomaly_map=torch.ones((1, 100, 100)).to(self.device), diff --git a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py index 88f61724fb..0801d0463f 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py +++ b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py @@ -24,7 +24,7 @@ def test_add_images(dataset_path: Path) -> None: limit_test_batches=1, accelerator="cpu", ) - engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtec", category="dummy")) + engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtec_ad", category="dummy")) # test if images are logged assert len(list(Path(dir_loc).glob("**/*.png"))) >= 1, "Failed to save to local path" diff --git a/tests/unit/utils/test_visualizer.py b/tests/unit/utils/test_visualizer.py index 26b65585b6..97414d9554 100644 --- a/tests/unit/utils/test_visualizer.py +++ b/tests/unit/utils/test_visualizer.py @@ -50,9 +50,9 @@ def test_model_visualizer_mode( fast_dev_run=True, devices=1, ) - datamodule = MVTecAD(root=dataset_path / "mvtec", category="dummy") + datamodule = MVTecAD(root=dataset_path / "mvtec_ad", category="dummy") engine.test(model=model, datamodule=datamodule, ckpt_path=str(_ckpt_path)) - dataset = PredictDataset(path=dataset_path / "mvtec" / "dummy" / "test") + dataset = PredictDataset(path=dataset_path / "mvtec_ad" / "dummy" / "test") datamodule = DataLoader(dataset, collate_fn=ImageBatch.collate) engine.predict(model=model, dataloaders=datamodule, ckpt_path=str(_ckpt_path)) From 2199f804b7ec7e92a7870ba609886c0ba44f766b Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 11 Feb 2025 05:02:42 +0000 Subject: [PATCH 03/19] Rename mvtec path to mvtec_ad Signed-off-by: Samet Akcay --- examples/cli/01_getting_started/basic_inference.sh | 8 ++++---- examples/notebooks/400_openvino/401_nncf.ipynb | 4 ++-- .../notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb | 4 ++-- tests/unit/data/utils/test_image.py | 4 ++-- tests/unit/data/utils/test_path.py | 4 ++-- tools/inference/README.md | 4 ++-- tools/inference/gradio_inference.py | 2 +- 7 files changed, 15 insertions(+), 15 deletions(-) diff --git a/examples/cli/01_getting_started/basic_inference.sh b/examples/cli/01_getting_started/basic_inference.sh index c3369d5919..eb37c22851 100644 --- a/examples/cli/01_getting_started/basic_inference.sh +++ b/examples/cli/01_getting_started/basic_inference.sh @@ -12,14 +12,14 @@ echo "=== Anomalib Inference Examples ===" echo -e "\n1. Basic Inference with Checkpoint Path" echo "# Predict using a model checkpoint" anomalib predict \ - --ckpt_path "./results/efficient_ad/mvtec/bottle/weights/model.ckpt" \ + --ckpt_path "./results/efficient_ad/mvtec_ad/bottle/weights/model.ckpt" \ --data_path path/to/image.jpg echo -e "\n2. Inference with Directory Path" echo "# Predict on all images in a directory" anomalib predict \ - --ckpt_path "./results/efficient_ad/mvtec/bottle/weights/model.ckpt" \ - --data_path "./datasets/mvtec/bottle/test" + --ckpt_path "./results/efficient_ad/mvtec_ad/bottle/weights/model.ckpt" \ + --data_path "./datasets/mvtec_ad/bottle/test" echo -e "\n3. Inference with Datamodule" echo "# Use a datamodule for inference" @@ -34,7 +34,7 @@ anomalib predict \ echo -e "\n4. Inference with Return Predictions" echo "# Return predictions instead of saving to disk" anomalib predict \ - --ckpt_path "./results/efficient_ad/mvtec/bottle/weights/model.ckpt" \ + --ckpt_path "./results/efficient_ad/mvtec_ad/bottle/weights/model.ckpt" \ --data_path path/to/image.jpg \ --return_predictions diff --git a/examples/notebooks/400_openvino/401_nncf.ipynb b/examples/notebooks/400_openvino/401_nncf.ipynb index 03404aed5f..39ec4157f1 100644 --- a/examples/notebooks/400_openvino/401_nncf.ipynb +++ b/examples/notebooks/400_openvino/401_nncf.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:be7ba7a7ab976549fa567f267ae7b0d904142fc3fb862a159c03dd079f08b35e -size 12017 +oid sha256:df1407966f53ba2671923bb639ca786f9a9952a562149342aa0520cb03a4976f +size 12023 diff --git a/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb b/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb index 89b319762a..c98cd3d2e9 100644 --- a/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb +++ b/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:83fd473e527968ff540f506b2e63f013e4216e57f488fb1f1e6c8f8ef5632572 -size 378250 +oid sha256:ec52796450b3c38044a387f9d4ad05d02406fa5d174ee3269da7b81595105cdd +size 378313 diff --git a/tests/unit/data/utils/test_image.py b/tests/unit/data/utils/test_image.py index 00cff13edd..0f636b5523 100644 --- a/tests/unit/data/utils/test_image.py +++ b/tests/unit/data/utils/test_image.py @@ -16,14 +16,14 @@ class TestGetImageFilenames: @staticmethod def test_existing_image_file(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct path for an existing image file.""" - image_path = dataset_path / "mvtec/dummy/train/good/000.png" + image_path = dataset_path / "mvtec_ad/dummy/train/good/000.png" image_filenames = get_image_filenames(image_path) assert image_filenames == [image_path.resolve()] @staticmethod def test_existing_image_directory(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct image filenames from an existing directory.""" - directory_path = dataset_path / "mvtec/dummy/train/good" + directory_path = dataset_path / "mvtec_ad/dummy/train/good" image_filenames = get_image_filenames(directory_path) expected_filenames = [(directory_path / f"{i:03d}.png").resolve() for i in range(5)] assert set(image_filenames) == set(expected_filenames) diff --git a/tests/unit/data/utils/test_path.py b/tests/unit/data/utils/test_path.py index 09f88496ad..6b3d70affb 100644 --- a/tests/unit/data/utils/test_path.py +++ b/tests/unit/data/utils/test_path.py @@ -35,14 +35,14 @@ def test_contains_non_printable_characters() -> None: @staticmethod def test_existing_file_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing file within the base directory.""" - file_path = dataset_path / "mvtec/dummy/train/good/000.png" + file_path = dataset_path / "mvtec_ad/dummy/train/good/000.png" validated_path = validate_path(file_path, base_dir=dataset_path) assert validated_path == file_path.resolve() @staticmethod def test_existing_directory_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing directory within the base directory.""" - directory_path = dataset_path / "mvtec/dummy/train/good" + directory_path = dataset_path / "mvtec_ad/dummy/train/good" validated_path = validate_path(directory_path, base_dir=dataset_path) assert validated_path == directory_path.resolve() diff --git a/tools/inference/README.md b/tools/inference/README.md index 737a297684..f8b535d134 100644 --- a/tools/inference/README.md +++ b/tools/inference/README.md @@ -13,7 +13,7 @@ Example: ```bash python tools/inference/lightning_inference.py \ --model anomalib.models.Padim \ - --ckpt_path results/padim/mvtec/bottle/weights/lightning/model.ckpt \ + --ckpt_path results/padim/mvtec_ad/bottle/weights/lightning/model.ckpt \ --data.path datasets/MVTec/bottle/test/broken_large \ --output ./outputs ``` @@ -23,7 +23,7 @@ You can also use a config file with the entrypoint Here is a simple YAML file for Padim Model. ```yaml -ckpt_path: results/padim/mvtec/bottle/weights/lightning/model.ckpt +ckpt_path: results/padim/mvtec_ad/bottle/weights/lightning/model.ckpt data: path: datasets/MVTec/bottle/test/broken_large transform: null diff --git a/tools/inference/gradio_inference.py b/tools/inference/gradio_inference.py index 474bb23ef6..344bfe3445 100644 --- a/tools/inference/gradio_inference.py +++ b/tools/inference/gradio_inference.py @@ -22,7 +22,7 @@ def get_parser() -> ArgumentParser: Example: Example for Torch Inference. >>> python tools/inference/gradio_inference.py \ - ... --weights ./results/padim/mvtec/bottle/weights/torch/model.pt + ... --weights ./results/padim/mvtec_ad/bottle/weights/torch/model.pt Returns: ArgumentParser: Argument parser for gradio inference. From 49e7d35efc00621baad83826d66ec538b2ac50a2 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 11 Feb 2025 12:57:54 +0000 Subject: [PATCH 04/19] Rename mvtec_ad to mvtecad Signed-off-by: Samet Akcay --- .../cli/01_getting_started/basic_inference.sh | 8 +- .../notebooks/400_openvino/401_nncf.ipynb | 4 +- .../700_metrics/701e_aupimo_advanced_iv.ipynb | 4 +- .../data/datamodules/image/__init__.py | 4 +- .../data/datamodules/image/mvtec_ad.py | 236 ----------------- .../image/{mvtec.py => mvtecad.py} | 4 +- src/anomalib/data/datasets/image/__init__.py | 3 +- src/anomalib/data/datasets/image/mvtec.py | 240 ------------------ .../image/{mvtec_ad.py => mvtecad.py} | 0 src/anomalib/data/datasets/image/visa.py | 2 +- src/anomalib/utils/config.py | 2 +- tests/conftest.py | 2 +- tests/helpers/data.py | 14 +- tests/integration/model/test_models.py | 2 +- .../unit/data/datamodule/image/test_folder.py | 2 +- .../data/datamodule/image/test_mvtec_ad.py | 2 +- tests/unit/data/utils/test_image.py | 4 +- tests/unit/data/utils/test_path.py | 4 +- tests/unit/data/utils/test_synthetic.py | 2 +- .../dummy_lightning_model.py | 2 +- .../visualizer_callback/test_visualizer.py | 2 +- tests/unit/utils/test_visualizer.py | 4 +- tools/inference/README.md | 4 +- tools/inference/gradio_inference.py | 2 +- 24 files changed, 38 insertions(+), 515 deletions(-) delete mode 100644 src/anomalib/data/datamodules/image/mvtec_ad.py rename src/anomalib/data/datamodules/image/{mvtec.py => mvtecad.py} (98%) delete mode 100644 src/anomalib/data/datasets/image/mvtec.py rename src/anomalib/data/datasets/image/{mvtec_ad.py => mvtecad.py} (100%) diff --git a/examples/cli/01_getting_started/basic_inference.sh b/examples/cli/01_getting_started/basic_inference.sh index eb37c22851..7f602934ce 100644 --- a/examples/cli/01_getting_started/basic_inference.sh +++ b/examples/cli/01_getting_started/basic_inference.sh @@ -12,14 +12,14 @@ echo "=== Anomalib Inference Examples ===" echo -e "\n1. Basic Inference with Checkpoint Path" echo "# Predict using a model checkpoint" anomalib predict \ - --ckpt_path "./results/efficient_ad/mvtec_ad/bottle/weights/model.ckpt" \ + --ckpt_path "./results/efficient_ad/mvtecad/bottle/weights/model.ckpt" \ --data_path path/to/image.jpg echo -e "\n2. Inference with Directory Path" echo "# Predict on all images in a directory" anomalib predict \ - --ckpt_path "./results/efficient_ad/mvtec_ad/bottle/weights/model.ckpt" \ - --data_path "./datasets/mvtec_ad/bottle/test" + --ckpt_path "./results/efficient_ad/mvtecad/bottle/weights/model.ckpt" \ + --data_path "./datasets/mvtecad/bottle/test" echo -e "\n3. Inference with Datamodule" echo "# Use a datamodule for inference" @@ -34,7 +34,7 @@ anomalib predict \ echo -e "\n4. Inference with Return Predictions" echo "# Return predictions instead of saving to disk" anomalib predict \ - --ckpt_path "./results/efficient_ad/mvtec_ad/bottle/weights/model.ckpt" \ + --ckpt_path "./results/efficient_ad/mvtecad/bottle/weights/model.ckpt" \ --data_path path/to/image.jpg \ --return_predictions diff --git a/examples/notebooks/400_openvino/401_nncf.ipynb b/examples/notebooks/400_openvino/401_nncf.ipynb index 39ec4157f1..e51225b0c9 100644 --- a/examples/notebooks/400_openvino/401_nncf.ipynb +++ b/examples/notebooks/400_openvino/401_nncf.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df1407966f53ba2671923bb639ca786f9a9952a562149342aa0520cb03a4976f -size 12023 +oid sha256:6f8cd393444c09428f7d93961064600dd97ca1cd2584e2784b7a07df61759b2f +size 12021 diff --git a/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb b/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb index c98cd3d2e9..946b8201ae 100644 --- a/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb +++ b/examples/notebooks/700_metrics/701e_aupimo_advanced_iv.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec52796450b3c38044a387f9d4ad05d02406fa5d174ee3269da7b81595105cdd -size 378313 +oid sha256:224b7e86d9949347250128d120bb5f32e8e114b8d4c89f9d57361b29ca2ff1ca +size 378292 diff --git a/src/anomalib/data/datamodules/image/__init__.py b/src/anomalib/data/datamodules/image/__init__.py index 629fac9f9e..f817d4c948 100644 --- a/src/anomalib/data/datamodules/image/__init__.py +++ b/src/anomalib/data/datamodules/image/__init__.py @@ -29,7 +29,7 @@ from .datumaro import Datumaro from .folder import Folder from .kolektor import Kolektor -from .mvtec import MVTec, MVTecAD # MVTec is an alias for backward compatibility +from .mvtecad import MVTec, MVTecAD from .visa import Visa @@ -53,7 +53,7 @@ class ImageDataFormat(str, Enum): FOLDER = "folder" FOLDER_3D = "folder_3d" KOLEKTOR = "kolektor" - MVTEC_AD = "mvtec_ad" + MVTEC_AD = "mvtecad" MVTEC_3D = "mvtec_3d" VISA = "visa" diff --git a/src/anomalib/data/datamodules/image/mvtec_ad.py b/src/anomalib/data/datamodules/image/mvtec_ad.py deleted file mode 100644 index 293fce1966..0000000000 --- a/src/anomalib/data/datamodules/image/mvtec_ad.py +++ /dev/null @@ -1,236 +0,0 @@ -"""MVTec AD Data Module. - -This module provides a PyTorch Lightning DataModule for the MVTec AD dataset. If -the dataset is not available locally, it will be downloaded and extracted -automatically. - -Example: - Create a MVTec AD datamodule:: - - >>> from anomalib.data import MVTecAD - >>> datamodule = MVTecAD( - ... root="./datasets/MVTecAD", - ... category="bottle" - ... ) - -Notes: - The dataset will be automatically downloaded and converted to the required - format when first used. The directory structure after preparation will be:: - - datasets/ - └── MVTecAD/ - β”œβ”€β”€ bottle/ - β”œβ”€β”€ cable/ - └── ... - -License: - MVTec AD dataset is released under the Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International License - (CC BY-NC-SA 4.0). - https://creativecommons.org/licenses/by-nc-sa/4.0/ - -Reference: - Paul Bergmann, Kilian Batzner, Michael Fauser, David Sattlegger, - Carsten Steger: The MVTec Anomaly Detection Dataset: A Comprehensive - Real-World Dataset for Unsupervised Anomaly Detection; in: International - Journal of Computer Vision 129(4):1038-1059, 2021, - DOI: 10.1007/s11263-020-01400-4. - - Paul Bergmann, Michael Fauser, David Sattlegger, Carsten Steger: MVTec AD β€” - A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection; - in: IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), - 9584-9592, 2019, DOI: 10.1109/CVPR.2019.00982. -""" - -# Copyright (C) 2022-2025 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -import logging -from pathlib import Path - -from torchvision.transforms.v2 import Transform - -from anomalib.data.datamodules.base.image import AnomalibDataModule -from anomalib.data.datasets.image.mvtec_ad import MVTecADDataset -from anomalib.data.utils import DownloadInfo, Split, TestSplitMode, ValSplitMode, download_and_extract - -logger = logging.getLogger(__name__) - - -DOWNLOAD_INFO = DownloadInfo( - name="mvtec_ad", - url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/" - "download/420938113-1629952094/mvtec_anomaly_detection.tar.xz", - hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d", -) - - -class MVTecAD(AnomalibDataModule): - """MVTec AD Datamodule. - - Args: - root (Path | str): Path to the root of the dataset. - Defaults to ``"./datasets/MVTecAD"``. - category (str): Category of the MVTec AD dataset (e.g. ``"bottle"`` or - ``"cable"``). Defaults to ``"bottle"``. - train_batch_size (int, optional): Training batch size. - Defaults to ``32``. - eval_batch_size (int, optional): Test batch size. - Defaults to ``32``. - num_workers (int, optional): Number of workers. - Defaults to ``8``. - train_augmentations (Transform | None): Augmentations to apply dto the training images - Defaults to ``None``. - val_augmentations (Transform | None): Augmentations to apply to the validation images. - Defaults to ``None``. - test_augmentations (Transform | None): Augmentations to apply to the test images. - Defaults to ``None``. - augmentations (Transform | None): General augmentations to apply if stage-specific - augmentations are not provided. - test_split_mode (TestSplitMode): Method to create test set. - Defaults to ``TestSplitMode.FROM_DIR``. - test_split_ratio (float): Fraction of data to use for testing. - Defaults to ``0.2``. - val_split_mode (ValSplitMode): Method to create validation set. - Defaults to ``ValSplitMode.SAME_AS_TEST``. - val_split_ratio (float): Fraction of data to use for validation. - Defaults to ``0.5``. - seed (int | None, optional): Seed for reproducibility. - Defaults to ``None``. - - Example: - Create MVTec AD datamodule with default settings:: - - >>> datamodule = MVTecAD() - >>> datamodule.setup() - >>> i, data = next(enumerate(datamodule.train_dataloader())) - >>> data.keys() - dict_keys(['image_path', 'label', 'image', 'mask_path', 'mask']) - - >>> data["image"].shape - torch.Size([32, 3, 256, 256]) - - Change the category:: - - >>> datamodule = MVTecAD(category="cable") - - Create validation set from test data:: - - >>> datamodule = MVTecAD( - ... val_split_mode=ValSplitMode.FROM_TEST, - ... val_split_ratio=0.1 - ... ) - - Create synthetic validation set:: - - >>> datamodule = MVTecAD( - ... val_split_mode=ValSplitMode.SYNTHETIC, - ... val_split_ratio=0.2 - ... ) - """ - - def __init__( - self, - root: Path | str = "./datasets/MVTecAD", - category: str = "bottle", - train_batch_size: int = 32, - eval_batch_size: int = 32, - num_workers: int = 8, - train_augmentations: Transform | None = None, - val_augmentations: Transform | None = None, - test_augmentations: Transform | None = None, - augmentations: Transform | None = None, - test_split_mode: TestSplitMode | str = TestSplitMode.FROM_DIR, - test_split_ratio: float = 0.2, - val_split_mode: ValSplitMode | str = ValSplitMode.SAME_AS_TEST, - val_split_ratio: float = 0.5, - seed: int | None = None, - ) -> None: - super().__init__( - train_batch_size=train_batch_size, - eval_batch_size=eval_batch_size, - num_workers=num_workers, - train_augmentations=train_augmentations, - val_augmentations=val_augmentations, - test_augmentations=test_augmentations, - augmentations=augmentations, - test_split_mode=test_split_mode, - test_split_ratio=test_split_ratio, - val_split_mode=val_split_mode, - val_split_ratio=val_split_ratio, - seed=seed, - ) - - self.root = Path(root) - self.category = category - - def _setup(self, _stage: str | None = None) -> None: - """Set up the datasets and perform dynamic subset splitting. - - This method may be overridden in subclass for custom splitting behaviour. - - Note: - The stage argument is not used here. This is because, for a given - instance of an AnomalibDataModule subclass, all three subsets are - created at the first call of setup(). This is to accommodate the - subset splitting behaviour of anomaly tasks, where the validation set - is usually extracted from the test set, and the test set must - therefore be created as early as the `fit` stage. - """ - self.train_data = MVTecADDataset( - split=Split.TRAIN, - root=self.root, - category=self.category, - ) - self.test_data = MVTecADDataset( - split=Split.TEST, - root=self.root, - category=self.category, - ) - - def prepare_data(self) -> None: - """Download the dataset if not available. - - This method checks if the specified dataset is available in the file - system. If not, it downloads and extracts the dataset into the - appropriate directory. - - Example: - Assume the dataset is not available on the file system:: - - >>> datamodule = MVTecAD( - ... root="./datasets/MVTecAD", - ... category="bottle" - ... ) - >>> datamodule.prepare_data() - - Directory structure after download:: - - datasets/ - └── MVTecAD/ - β”œβ”€β”€ bottle/ - β”œβ”€β”€ cable/ - └── ... - """ - if (self.root / self.category).is_dir(): - logger.info("Found the dataset.") - else: - download_and_extract(self.root, DOWNLOAD_INFO) - - -class MVTec(MVTecAD): - """MVTec datamodule class (Deprecated). - - This class is deprecated and will be removed in a future version. - Please use MVTecAD instead. - """ - - def __init__(self, *args, **kwargs) -> None: - import warnings - - warnings.warn( - "MVTec is deprecated and will be removed in a future version. Please use MVTecAD instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) diff --git a/src/anomalib/data/datamodules/image/mvtec.py b/src/anomalib/data/datamodules/image/mvtecad.py similarity index 98% rename from src/anomalib/data/datamodules/image/mvtec.py rename to src/anomalib/data/datamodules/image/mvtecad.py index 293fce1966..53da172f60 100644 --- a/src/anomalib/data/datamodules/image/mvtec.py +++ b/src/anomalib/data/datamodules/image/mvtecad.py @@ -51,14 +51,14 @@ from torchvision.transforms.v2 import Transform from anomalib.data.datamodules.base.image import AnomalibDataModule -from anomalib.data.datasets.image.mvtec_ad import MVTecADDataset +from anomalib.data.datasets.image.mvtecad import MVTecADDataset from anomalib.data.utils import DownloadInfo, Split, TestSplitMode, ValSplitMode, download_and_extract logger = logging.getLogger(__name__) DOWNLOAD_INFO = DownloadInfo( - name="mvtec_ad", + name="mvtecad", url="https://www.mydrive.ch/shares/38536/3830184030e49fe74747669442f0f282/" "download/420938113-1629952094/mvtec_anomaly_detection.tar.xz", hashsum="cf4313b13603bec67abb49ca959488f7eedce2a9f7795ec54446c649ac98cd3d", diff --git a/src/anomalib/data/datasets/image/__init__.py b/src/anomalib/data/datasets/image/__init__.py index 89832754a4..8211fdf25b 100644 --- a/src/anomalib/data/datasets/image/__init__.py +++ b/src/anomalib/data/datasets/image/__init__.py @@ -26,8 +26,7 @@ from .datumaro import DatumaroDataset from .folder import FolderDataset from .kolektor import KolektorDataset -from .mvtec import MVTecDataset -from .mvtec_ad import MVTecADDataset +from .mvtecad import MVTecADDataset, MVTecDataset from .visa import VisaDataset __all__ = [ diff --git a/src/anomalib/data/datasets/image/mvtec.py b/src/anomalib/data/datasets/image/mvtec.py deleted file mode 100644 index 2d41931551..0000000000 --- a/src/anomalib/data/datasets/image/mvtec.py +++ /dev/null @@ -1,240 +0,0 @@ -"""MVTec AD Dataset. - -This module provides PyTorch Dataset implementation for the MVTec AD dataset. The -dataset will be downloaded and extracted automatically if not found locally. - -The dataset contains 15 categories of industrial objects with both normal and -anomalous samples. Each category includes RGB images and pixel-level ground truth -masks for anomaly segmentation. - -License: - MVTec AD dataset is released under the Creative Commons - Attribution-NonCommercial-ShareAlike 4.0 International License - (CC BY-NC-SA 4.0) https://creativecommons.org/licenses/by-nc-sa/4.0/ - -Reference: - Bergmann, P., Batzner, K., Fauser, M., Sattlegger, D., & Steger, C. (2021). - The MVTec Anomaly Detection Dataset: A Comprehensive Real-World Dataset for - Unsupervised Anomaly Detection. International Journal of Computer Vision, - 129(4), 1038-1059. - - Bergmann, P., Fauser, M., Sattlegger, D., & Steger, C. (2019). MVTec AD β€” - A Comprehensive Real-World Dataset for Unsupervised Anomaly Detection. In - IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR), - 9584-9592. -""" - -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -from collections.abc import Sequence -from pathlib import Path - -from pandas import DataFrame -from torchvision.transforms.v2 import Transform - -from anomalib.data.datasets.base import AnomalibDataset -from anomalib.data.errors import MisMatchError -from anomalib.data.utils import LabelName, Split, validate_path - -IMG_EXTENSIONS = (".png", ".PNG") -CATEGORIES = ( - "bottle", - "cable", - "capsule", - "carpet", - "grid", - "hazelnut", - "leather", - "metal_nut", - "pill", - "screw", - "tile", - "toothbrush", - "transistor", - "wood", - "zipper", -) - - -class MVTecADDataset(AnomalibDataset): - """MVTec AD dataset class. - - Dataset class for loading and processing MVTec AD dataset images. Supports - both classification and segmentation tasks. - - Args: - root (Path | str): Path to root directory containing the dataset. - Defaults to ``"./datasets/MVTec"``. - category (str): Category name, must be one of ``CATEGORIES``. - Defaults to ``"bottle"``. - augmentations (Transform, optional): Augmentations that should be applied to the input images. - Defaults to ``None``. - split (str | Split | None, optional): Dataset split - usually - ``Split.TRAIN`` or ``Split.TEST``. Defaults to ``None``. - - Example: - >>> from pathlib import Path - >>> from anomalib.data.datasets import MVTecADDataset - >>> dataset = MVTecADDataset( - ... root=Path("./datasets/MVTec"), - ... category="bottle", - ... split="train" - ... ) - - For classification tasks, each sample contains: - - >>> sample = dataset[0] - >>> list(sample.keys()) - ['image_path', 'label', 'image'] - - For segmentation tasks, samples also include mask paths and masks: - - >>> dataset.task = "segmentation" - >>> sample = dataset[0] - >>> list(sample.keys()) - ['image_path', 'label', 'image', 'mask_path', 'mask'] - - Images are PyTorch tensors with shape ``(C, H, W)``, masks have shape - ``(H, W)``: - - >>> sample["image"].shape, sample["mask"].shape - (torch.Size([3, 256, 256]), torch.Size([256, 256])) - """ - - def __init__( - self, - root: Path | str = "./datasets/MVTec", - category: str = "bottle", - augmentations: Transform | None = None, - split: str | Split | None = None, - ) -> None: - super().__init__(augmentations=augmentations) - - self.root_category = Path(root) / Path(category) - self.category = category - self.split = split - self.samples = make_mvtec_dataset( - self.root_category, - split=self.split, - extensions=IMG_EXTENSIONS, - ) - - -def make_mvtec_dataset( - root: str | Path, - split: str | Split | None = None, - extensions: Sequence[str] | None = None, -) -> DataFrame: - """Create MVTec AD samples by parsing the data directory structure. - - The files are expected to follow the structure: - ``path/to/dataset/split/category/image_filename.png`` - ``path/to/dataset/ground_truth/category/mask_filename.png`` - - Args: - root (Path | str): Path to dataset root directory - split (str | Split | None, optional): Dataset split (train or test) - Defaults to ``None``. - extensions (Sequence[str] | None, optional): Valid file extensions - Defaults to ``None``. - - Returns: - DataFrame: Dataset samples with columns: - - path: Base path to dataset - - split: Dataset split (train/test) - - label: Class label - - image_path: Path to image file - - mask_path: Path to mask file (if available) - - label_index: Numeric label (0=normal, 1=abnormal) - - Example: - >>> root = Path("./datasets/MVTec/bottle") - >>> samples = make_mvtec_dataset(root, split="train") - >>> samples.head() - path split label image_path mask_path label_index - 0 datasets/MVTec/bottle train good [...]/good/105.png 0 - 1 datasets/MVTec/bottle train good [...]/good/017.png 0 - - Raises: - RuntimeError: If no valid images are found - MisMatchError: If anomalous images and masks don't match - """ - if extensions is None: - extensions = IMG_EXTENSIONS - - root = validate_path(root) - samples_list = [(str(root),) + f.parts[-3:] for f in root.glob(r"**/*") if f.suffix in extensions] - if not samples_list: - msg = f"Found 0 images in {root}" - raise RuntimeError(msg) - - samples = DataFrame(samples_list, columns=["path", "split", "label", "image_path"]) - - # Modify image_path column by converting to absolute path - samples["image_path"] = samples.path + "/" + samples.split + "/" + samples.label + "/" + samples.image_path - - # Create label index for normal (0) and anomalous (1) images. - samples.loc[(samples.label == "good"), "label_index"] = LabelName.NORMAL - samples.loc[(samples.label != "good"), "label_index"] = LabelName.ABNORMAL - samples.label_index = samples.label_index.astype(int) - - # separate masks from samples - mask_samples = samples.loc[samples.split == "ground_truth"].sort_values( - by="image_path", - ignore_index=True, - ) - samples = samples[samples.split != "ground_truth"].sort_values( - by="image_path", - ignore_index=True, - ) - - # assign mask paths to anomalous test images - samples["mask_path"] = "" - samples.loc[ - (samples.split == "test") & (samples.label_index == LabelName.ABNORMAL), - "mask_path", - ] = mask_samples.image_path.to_numpy() - - # assert that the right mask files are associated with the right test images - abnormal_samples = samples.loc[samples.label_index == LabelName.ABNORMAL] - if ( - len(abnormal_samples) - and not abnormal_samples.apply( - lambda x: Path(x.image_path).stem in Path(x.mask_path).stem, - axis=1, - ).all() - ): - msg = ( - "Mismatch between anomalous images and ground truth masks. Make sure " - "mask files in 'ground_truth' folder follow the same naming " - "convention as the anomalous images (e.g. image: '000.png', " - "mask: '000.png' or '000_mask.png')." - ) - raise MisMatchError(msg) - - # infer the task type - samples.attrs["task"] = "classification" if (samples["mask_path"] == "").all() else "segmentation" - - if split: - samples = samples[samples.split == split].reset_index(drop=True) - - return samples - - -class MVTecDataset(MVTecADDataset): - """MVTec dataset class (Deprecated). - - This class is deprecated and will be removed in a future version. - Please use MVTecADDataset instead. - """ - - def __init__(self, *args, **kwargs) -> None: - import warnings - - warnings.warn( - "MVTecDataset is deprecated and will be removed in a future version. Please use MVTecADDataset instead.", - DeprecationWarning, - stacklevel=2, - ) - super().__init__(*args, **kwargs) diff --git a/src/anomalib/data/datasets/image/mvtec_ad.py b/src/anomalib/data/datasets/image/mvtecad.py similarity index 100% rename from src/anomalib/data/datasets/image/mvtec_ad.py rename to src/anomalib/data/datasets/image/mvtecad.py diff --git a/src/anomalib/data/datasets/image/visa.py b/src/anomalib/data/datasets/image/visa.py index af054324e4..ffcc351381 100644 --- a/src/anomalib/data/datasets/image/visa.py +++ b/src/anomalib/data/datasets/image/visa.py @@ -28,7 +28,7 @@ from torchvision.transforms.v2 import Transform from anomalib.data.datasets import AnomalibDataset -from anomalib.data.datasets.image.mvtec_ad import make_mvtec_dataset +from anomalib.data.datasets.image.mvtecad import make_mvtec_dataset from anomalib.data.utils import Split EXTENSIONS = (".png", ".jpg", ".JPG") diff --git a/src/anomalib/utils/config.py b/src/anomalib/utils/config.py index 5a867c55bc..235504715b 100644 --- a/src/anomalib/utils/config.py +++ b/src/anomalib/utils/config.py @@ -128,7 +128,7 @@ def to_yaml(config: Namespace | ListConfig | DictConfig) -> str: Examples: >>> from omegaconf import DictConfig - >>> config = DictConfig({"model": "padim", "dataset": {"name": "mvtec_ad"}}) + >>> config = DictConfig({"model": "padim", "dataset": {"name": "mvtecad"}}) >>> yaml_str = to_yaml(config) >>> print(yaml_str) model: padim diff --git a/tests/conftest.py b/tests/conftest.py index 560a6b9ef9..bdb35f3dbb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTecAD(root=dataset_path / "mvtec_ad", category="dummy") + dataset = MVTecAD(root=dataset_path / "mvtecad", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 718aad1785..dca613a989 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -277,15 +277,15 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): Examples: To create an MVTecAD dataset with 10 training images and 10 testing images per category, use the following code. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec_ad", num_train=10, num_test=10) + >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtecad", num_train=10, num_test=10) >>> dataset_generator.generate_dataset() In order to provide a specific directory to save the dataset, use the ``root`` argument. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtec_ad", root="./datasets/dummy") + >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtecad", root="./datasets/dummy") >>> dataset_generator.generate_dataset() It is also possible to use the generator as a context manager. - >>> with DummyImageDatasetGenerator(data_format="mvtec_ad", num_train=10, num_test=10) as dataset_path: + >>> with DummyImageDatasetGenerator(data_format="mvtecad", num_train=10, num_test=10) as dataset_path: >>> some_function() To get the list of available datasets, use the ``DataFormat`` enum. @@ -298,7 +298,7 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): def __init__( self, - data_format: DataFormat | str = "mvtec_ad", + data_format: DataFormat | str = "mvtecad", root: Path | str | None = None, normal_category: str = "good", abnormal_category: str = "bad", @@ -360,7 +360,7 @@ def _generate_dummy_datumaro_dataset(self) -> None: with annotation_file.open("w") as f: json.dump(annotations, f) - def _generate_dummy_mvtec_ad_dataset( + def _generate_dummy_mvtecad_dataset( self, normal_dir: str = "good", abnormal_dir: str | None = None, @@ -414,7 +414,7 @@ def _generate_dummy_folder_dataset(self) -> None: def _generate_dummy_btech_dataset(self) -> None: """Generate dummy BeanTech dataset in directory using the same convention as BeanTech AD.""" # BeanTech AD follows the same convention as MVTec AD. - self._generate_dummy_mvtec_ad_dataset(normal_dir="ok", abnormal_dir="ko", mask_suffix="") + self._generate_dummy_mvtecad_dataset(normal_dir="ok", abnormal_dir="ko", mask_suffix="") def _generate_dummy_mvtec_3d_dataset(self) -> None: """Generate dummy MVTec 3D AD dataset in a temporary directory using the same convention as MVTec AD.""" @@ -466,7 +466,7 @@ def _generate_dummy_visa_dataset(self) -> None: # Visa dataset on anomalib follows the same convention as MVTec AD. # The only difference is that the root directory has a subdirectory called "visa_pytorch". self.dataset_root = self.dataset_root.parent / "visa_pytorch" - self._generate_dummy_mvtec_ad_dataset(normal_dir="good", abnormal_dir="bad", image_extension=".jpg") + self._generate_dummy_mvtecad_dataset(normal_dir="good", abnormal_dir="bad", image_extension=".jpg") class DummyVideoDatasetGenerator(DummyDatasetGenerator): diff --git a/tests/integration/model/test_models.py b/tests/integration/model/test_models.py index 528c855e21..ac875796a9 100644 --- a/tests/integration/model/test_models.py +++ b/tests/integration/model/test_models.py @@ -202,7 +202,7 @@ def _get_objects( # EfficientAd requires that the batch size be lesser than the number of images in the dataset. # This is so that the LR step size is not 0. dataset = MVTecAD( - root=dataset_path / "mvtec_ad", + root=dataset_path / "mvtecad", category="dummy", # EfficientAd requires train batch size 1 train_batch_size=1 if model_name == "efficient_ad" else 2, diff --git a/tests/unit/data/datamodule/image/test_folder.py b/tests/unit/data/datamodule/image/test_folder.py index 2f632b2b92..7ecb2c0e64 100644 --- a/tests/unit/data/datamodule/image/test_folder.py +++ b/tests/unit/data/datamodule/image/test_folder.py @@ -28,7 +28,7 @@ def datamodule(dataset_path: Path) -> Folder: # Create and prepare the dataset _datamodule = Folder( name="dummy", - root=dataset_path / "mvtec_ad" / "dummy", + root=dataset_path / "mvtecad" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/data/datamodule/image/test_mvtec_ad.py b/tests/unit/data/datamodule/image/test_mvtec_ad.py index 7f13d9b0ad..89f973c5bb 100644 --- a/tests/unit/data/datamodule/image/test_mvtec_ad.py +++ b/tests/unit/data/datamodule/image/test_mvtec_ad.py @@ -20,7 +20,7 @@ class TestMVTecAD(_TestAnomalibImageDatamodule): def datamodule(dataset_path: Path) -> MVTecAD: """Create and return a MVTec datamodule.""" _datamodule = MVTecAD( - root=dataset_path / "mvtec_ad", + root=dataset_path / "mvtecad", category="dummy", train_batch_size=4, eval_batch_size=4, diff --git a/tests/unit/data/utils/test_image.py b/tests/unit/data/utils/test_image.py index 0f636b5523..ac3ab1def3 100644 --- a/tests/unit/data/utils/test_image.py +++ b/tests/unit/data/utils/test_image.py @@ -16,14 +16,14 @@ class TestGetImageFilenames: @staticmethod def test_existing_image_file(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct path for an existing image file.""" - image_path = dataset_path / "mvtec_ad/dummy/train/good/000.png" + image_path = dataset_path / "mvtecad/dummy/train/good/000.png" image_filenames = get_image_filenames(image_path) assert image_filenames == [image_path.resolve()] @staticmethod def test_existing_image_directory(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct image filenames from an existing directory.""" - directory_path = dataset_path / "mvtec_ad/dummy/train/good" + directory_path = dataset_path / "mvtecad/dummy/train/good" image_filenames = get_image_filenames(directory_path) expected_filenames = [(directory_path / f"{i:03d}.png").resolve() for i in range(5)] assert set(image_filenames) == set(expected_filenames) diff --git a/tests/unit/data/utils/test_path.py b/tests/unit/data/utils/test_path.py index 6b3d70affb..55a6f8b062 100644 --- a/tests/unit/data/utils/test_path.py +++ b/tests/unit/data/utils/test_path.py @@ -35,14 +35,14 @@ def test_contains_non_printable_characters() -> None: @staticmethod def test_existing_file_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing file within the base directory.""" - file_path = dataset_path / "mvtec_ad/dummy/train/good/000.png" + file_path = dataset_path / "mvtecad/dummy/train/good/000.png" validated_path = validate_path(file_path, base_dir=dataset_path) assert validated_path == file_path.resolve() @staticmethod def test_existing_directory_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing directory within the base directory.""" - directory_path = dataset_path / "mvtec_ad/dummy/train/good" + directory_path = dataset_path / "mvtecad/dummy/train/good" validated_path = validate_path(directory_path, base_dir=dataset_path) assert validated_path == directory_path.resolve() diff --git a/tests/unit/data/utils/test_synthetic.py b/tests/unit/data/utils/test_synthetic.py index 9ac861ae53..cc8a539e79 100644 --- a/tests/unit/data/utils/test_synthetic.py +++ b/tests/unit/data/utils/test_synthetic.py @@ -18,7 +18,7 @@ def folder_dataset(dataset_path: Path) -> FolderDataset: """Fixture that returns a FolderDataset instance.""" return FolderDataset( name="dummy", - root=dataset_path / "mvtec_ad" / "dummy", + root=dataset_path / "mvtecad" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py index cd88f78e79..dfbee026e7 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py +++ b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py @@ -50,7 +50,7 @@ def test_step(self, *_, **__) -> ImageBatch: """Only used to trigger on_test_epoch_end.""" self.log(name="loss", value=0.0, prog_bar=True) return ImageBatch( - image_path=[Path(self.dataset_path / "mvtec_ad" / "dummy" / "train" / "good" / "000.png")], + image_path=[Path(self.dataset_path / "mvtecad" / "dummy" / "train" / "good" / "000.png")], image=torch.rand((1, 3, 100, 100)).to(self.device), gt_mask=torch.zeros((1, 100, 100)).to(self.device), anomaly_map=torch.ones((1, 100, 100)).to(self.device), diff --git a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py index 0801d0463f..07f7c71b1a 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py +++ b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py @@ -24,7 +24,7 @@ def test_add_images(dataset_path: Path) -> None: limit_test_batches=1, accelerator="cpu", ) - engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtec_ad", category="dummy")) + engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtecad", category="dummy")) # test if images are logged assert len(list(Path(dir_loc).glob("**/*.png"))) >= 1, "Failed to save to local path" diff --git a/tests/unit/utils/test_visualizer.py b/tests/unit/utils/test_visualizer.py index 97414d9554..7476cf4943 100644 --- a/tests/unit/utils/test_visualizer.py +++ b/tests/unit/utils/test_visualizer.py @@ -50,9 +50,9 @@ def test_model_visualizer_mode( fast_dev_run=True, devices=1, ) - datamodule = MVTecAD(root=dataset_path / "mvtec_ad", category="dummy") + datamodule = MVTecAD(root=dataset_path / "mvtecad", category="dummy") engine.test(model=model, datamodule=datamodule, ckpt_path=str(_ckpt_path)) - dataset = PredictDataset(path=dataset_path / "mvtec_ad" / "dummy" / "test") + dataset = PredictDataset(path=dataset_path / "mvtecad" / "dummy" / "test") datamodule = DataLoader(dataset, collate_fn=ImageBatch.collate) engine.predict(model=model, dataloaders=datamodule, ckpt_path=str(_ckpt_path)) diff --git a/tools/inference/README.md b/tools/inference/README.md index f8b535d134..7a534bb213 100644 --- a/tools/inference/README.md +++ b/tools/inference/README.md @@ -13,7 +13,7 @@ Example: ```bash python tools/inference/lightning_inference.py \ --model anomalib.models.Padim \ - --ckpt_path results/padim/mvtec_ad/bottle/weights/lightning/model.ckpt \ + --ckpt_path results/padim/mvtecad/bottle/weights/lightning/model.ckpt \ --data.path datasets/MVTec/bottle/test/broken_large \ --output ./outputs ``` @@ -23,7 +23,7 @@ You can also use a config file with the entrypoint Here is a simple YAML file for Padim Model. ```yaml -ckpt_path: results/padim/mvtec_ad/bottle/weights/lightning/model.ckpt +ckpt_path: results/padim/mvtecad/bottle/weights/lightning/model.ckpt data: path: datasets/MVTec/bottle/test/broken_large transform: null diff --git a/tools/inference/gradio_inference.py b/tools/inference/gradio_inference.py index 344bfe3445..82cb739924 100644 --- a/tools/inference/gradio_inference.py +++ b/tools/inference/gradio_inference.py @@ -22,7 +22,7 @@ def get_parser() -> ArgumentParser: Example: Example for Torch Inference. >>> python tools/inference/gradio_inference.py \ - ... --weights ./results/padim/mvtec_ad/bottle/weights/torch/model.pt + ... --weights ./results/padim/mvtecad/bottle/weights/torch/model.pt Returns: ArgumentParser: Argument parser for gradio inference. From 8db23a1b6e0832641ebcf28946a93dbbfc56aace Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 11 Feb 2025 15:59:30 +0000 Subject: [PATCH 05/19] Path fixes Signed-off-by: Samet Akcay --- README.md | 2 +- docs/source/markdown/guides/how_to/data/datamodules.md | 2 +- docs/source/markdown/guides/how_to/data/datasets.md | 2 +- .../guides/how_to/visualization/visualize_image.md | 4 ++-- .../data/image/folder/classification/cli/default.yaml | 2 +- .../snippets/data/image/folder/segmentation/normal.txt | 2 +- .../snippets/data/transforms/datamodule_custom_cli.yaml | 2 +- examples/api/02_data/mvtec.py | 6 +++--- examples/configs/data/folder.yaml | 2 +- examples/configs/data/mvtec.yaml | 2 +- .../000_getting_started/001_getting_started.ipynb | 4 ++-- src/anomalib/data/__init__.py | 3 +-- src/anomalib/data/datasets/image/mvtecad.py | 6 +++--- src/anomalib/data/utils/path.py | 4 ++-- src/anomalib/engine/engine.py | 2 +- tests/conftest.py | 2 +- tests/helpers/data.py | 8 ++++---- tests/integration/cli/test_cli.py | 8 ++++---- tools/inference/README.md | 4 ++-- 19 files changed, 33 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index d79a279a75..4d3d8a157d 100644 --- a/README.md +++ b/README.md @@ -247,7 +247,7 @@ anomalib benchmark --config tools/benchmarking/benchmark_params.yaml > πŸ’‘ **Tip:** Check individual model performance in their respective README files: > -> - [Patchcore Results](src/anomalib/models/image/patchcore/README.md#MVTecAD-ad-dataset) +> - [Patchcore Results](src/anomalib/models/image/patchcore/README.md#mvtec-ad-dataset) > - [Other Models](src/anomalib/models/) # ✍️ Reference diff --git a/docs/source/markdown/guides/how_to/data/datamodules.md b/docs/source/markdown/guides/how_to/data/datamodules.md index 343aacca77..e50a032ac1 100644 --- a/docs/source/markdown/guides/how_to/data/datamodules.md +++ b/docs/source/markdown/guides/how_to/data/datamodules.md @@ -106,7 +106,7 @@ The data flow is: from anomalib.data import MVTecAD datamodule = MVTecAD( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category="bottle", train_batch_size=32, eval_batch_size=32, diff --git a/docs/source/markdown/guides/how_to/data/datasets.md b/docs/source/markdown/guides/how_to/data/datasets.md index b2a4c3274d..d2f62c220f 100644 --- a/docs/source/markdown/guides/how_to/data/datasets.md +++ b/docs/source/markdown/guides/how_to/data/datasets.md @@ -50,7 +50,7 @@ from anomalib.data.datasets import MVTecADDataset # Create MVTecAD dataset dataset = MVTecADDataset( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category="bottle", split="train" ) diff --git a/docs/source/markdown/guides/how_to/visualization/visualize_image.md b/docs/source/markdown/guides/how_to/visualization/visualize_image.md index d9e0d14828..64b684fdf6 100644 --- a/docs/source/markdown/guides/how_to/visualization/visualize_image.md +++ b/docs/source/markdown/guides/how_to/visualization/visualize_image.md @@ -70,8 +70,8 @@ import torch from torchvision.io import read_image # Create sample data -image_path = "./datasets/MVTec/bottle/test/broken_large/000.png" -mask_path = "./datasets/MVTec/bottle/ground_truth/broken_large/000_mask.png" +image_path = "./datasets/MVTecAD/bottle/test/broken_large/000.png" +mask_path = "./datasets/MVTecAD/bottle/ground_truth/broken_large/000_mask.png" image = read_image(image_path) mask = read_image(mask_path) diff --git a/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml b/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml index 4d8abf93f9..6798d8cb4c 100644 --- a/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml +++ b/docs/source/snippets/config/data/image/folder/classification/cli/default.yaml @@ -1,7 +1,7 @@ class_path: anomalib.data.Folder init_args: name: "MVTecAD" - root: "datasets/MVTec/transistor" + root: "datasets/MVTecAD/transistor" normal_dir: "train/good" abnormal_dir: "test/bent_lead" normal_test_dir: "test/good" diff --git a/docs/source/snippets/data/image/folder/segmentation/normal.txt b/docs/source/snippets/data/image/folder/segmentation/normal.txt index 2d01d22674..087bbf635a 100644 --- a/docs/source/snippets/data/image/folder/segmentation/normal.txt +++ b/docs/source/snippets/data/image/folder/segmentation/normal.txt @@ -5,7 +5,7 @@ from anomalib.data.utils import TestSplitMode, ValSplitMode # Create the datamodule datamodule = Folder( name="hazelnut_toy", - root="datasets/MVTec/transistor", + root="datasets/MVTecAD/transistor", normal_dir="train/good", val_split_mode=ValSplitMode.NONE, test_split_mode=TestSplitMode.NONE, diff --git a/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml b/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml index c3cfbdb869..6eb967ae9e 100644 --- a/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml +++ b/docs/source/snippets/data/transforms/datamodule_custom_cli.yaml @@ -1,6 +1,6 @@ class_path: anomalib.data.MVTecAD init_args: - root: ./datasets/MVTec + root: ./datasets/MVTecAD category: bottle image_size: [256, 256] train_batch_size: 32 diff --git a/examples/api/02_data/mvtec.py b/examples/api/02_data/mvtec.py index 39e4a86d13..4907275bb0 100644 --- a/examples/api/02_data/mvtec.py +++ b/examples/api/02_data/mvtec.py @@ -12,14 +12,14 @@ # 1. Basic Usage # Load a specific category with default settings datamodule = MVTecAD( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category="bottle", ) # 2. Advanced Configuration # Customize data loading and preprocessing datamodule = MVTecAD( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category="bottle", train_batch_size=32, eval_batch_size=32, @@ -32,7 +32,7 @@ # Train on multiple categories (if supported by the model) for category in ["bottle", "cable", "capsule"]: category_data = MVTecAD( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category=category, ) # Use category_data with your model... diff --git a/examples/configs/data/folder.yaml b/examples/configs/data/folder.yaml index 705d83051f..830d5f8981 100644 --- a/examples/configs/data/folder.yaml +++ b/examples/configs/data/folder.yaml @@ -1,7 +1,7 @@ class_path: anomalib.data.Folder init_args: name: bottle - root: "datasets/MVTec/bottle" + root: "datasets/MVTecAD/bottle" normal_dir: "train/good" abnormal_dir: "test/broken_large" normal_test_dir: "test/good" diff --git a/examples/configs/data/mvtec.yaml b/examples/configs/data/mvtec.yaml index 21f5d23b99..b370fc7e91 100644 --- a/examples/configs/data/mvtec.yaml +++ b/examples/configs/data/mvtec.yaml @@ -1,6 +1,6 @@ class_path: anomalib.data.MVTecAD init_args: - root: ./datasets/MVTec + root: ./datasets/MVTecAD category: bottle train_batch_size: 32 eval_batch_size: 32 diff --git a/examples/notebooks/000_getting_started/001_getting_started.ipynb b/examples/notebooks/000_getting_started/001_getting_started.ipynb index 325934d1d3..5a653af91c 100644 --- a/examples/notebooks/000_getting_started/001_getting_started.ipynb +++ b/examples/notebooks/000_getting_started/001_getting_started.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:b111d020f79198df6f1769c977cef240d50567bb6a01bc0efe5750c5eff14185 -size 18724 +oid sha256:e4debb404dcb5023c6657f707eab60dded5b7a5327ea1d71a5476dc7ca6f919a +size 18728 diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py index 6c26c0a3f3..096a48bb6d 100644 --- a/src/anomalib/data/__init__.py +++ b/src/anomalib/data/__init__.py @@ -49,7 +49,7 @@ # Datamodules from .datamodules.base import AnomalibDataModule from .datamodules.depth import DepthDataFormat, Folder3D, MVTec3D -from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTec, MVTecAD, Visa +from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTecAD, Visa from .datamodules.video import Avenue, ShanghaiTech, UCSDped, VideoDataFormat # Datasets @@ -161,7 +161,6 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule "Kolektor", "KolektorDataset", "MVTecAD", - "MVTec", "MVTecADDataset", "Visa", "VisaDataset", diff --git a/src/anomalib/data/datasets/image/mvtecad.py b/src/anomalib/data/datasets/image/mvtecad.py index b3d176f5b6..9c78779172 100644 --- a/src/anomalib/data/datasets/image/mvtecad.py +++ b/src/anomalib/data/datasets/image/mvtecad.py @@ -149,12 +149,12 @@ def make_mvtec_dataset( - label_index: Numeric label (0=normal, 1=abnormal) Example: - >>> root = Path("./datasets/MVTec/bottle") + >>> root = Path("./datasets/MVTecAD/bottle") >>> samples = make_mvtec_dataset(root, split="train") >>> samples.head() path split label image_path mask_path label_index - 0 datasets/MVTec/bottle train good [...]/good/105.png 0 - 1 datasets/MVTec/bottle train good [...]/good/017.png 0 + 0 datasets/MVTecAD/bottle train good [...]/good/105.png 0 + 1 datasets/MVTecAD/bottle train good [...]/good/017.png 0 Raises: RuntimeError: If no valid images are found diff --git a/src/anomalib/data/utils/path.py b/src/anomalib/data/utils/path.py index 80174159ef..f9245c344c 100644 --- a/src/anomalib/data/utils/path.py +++ b/src/anomalib/data/utils/path.py @@ -10,9 +10,9 @@ Example: >>> from anomalib.data.utils.path import validate_path - >>> path = validate_path("./datasets/MVTec/bottle/train/good/000.png") + >>> path = validate_path("./datasets/MVTecAD/bottle/train/good/000.png") >>> print(path) - PosixPath('/abs/path/to/anomalib/datasets/MVTec/bottle/train/good/000.png') + PosixPath('/abs/path/to/anomalib/datasets/MVTecAD/bottle/train/good/000.png') >>> from anomalib.data.utils.path import DirType >>> print(DirType.NORMAL) diff --git a/src/anomalib/engine/engine.py b/src/anomalib/engine/engine.py index b6e03bb038..70d8fcb629 100644 --- a/src/anomalib/engine/engine.py +++ b/src/anomalib/engine/engine.py @@ -608,7 +608,7 @@ def predict( ```python anomalib predict --model anomalib.models.Padim anomalib predict --model Padim \ - --data datasets/MVTec/bottle/test/broken_large + --data datasets/MVTecAD/bottle/test/broken_large ``` 2. Of course, you can override the various values with commands. ```python diff --git a/tests/conftest.py b/tests/conftest.py index bdb35f3dbb..0b59140010 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTecAD(root=dataset_path / "mvtecad", category="dummy") + dataset = MVTecAD(root=dataset_path / "MVTecAD", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path diff --git a/tests/helpers/data.py b/tests/helpers/data.py index dca613a989..444c621bd9 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -277,15 +277,15 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): Examples: To create an MVTecAD dataset with 10 training images and 10 testing images per category, use the following code. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtecad", num_train=10, num_test=10) + >>> dataset_generator = DummyImageDatasetGenerator(data_format="MVTecAD", num_train=10, num_test=10) >>> dataset_generator.generate_dataset() In order to provide a specific directory to save the dataset, use the ``root`` argument. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtecad", root="./datasets/dummy") + >>> dataset_generator = DummyImageDatasetGenerator(data_format="MVTecAD", root="./datasets/dummy") >>> dataset_generator.generate_dataset() It is also possible to use the generator as a context manager. - >>> with DummyImageDatasetGenerator(data_format="mvtecad", num_train=10, num_test=10) as dataset_path: + >>> with DummyImageDatasetGenerator(data_format="MVTecAD", num_train=10, num_test=10) as dataset_path: >>> some_function() To get the list of available datasets, use the ``DataFormat`` enum. @@ -298,7 +298,7 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): def __init__( self, - data_format: DataFormat | str = "mvtecad", + data_format: DataFormat | str = "MVTecAD", root: Path | str | None = None, normal_category: str = "good", abnormal_category: str = "bad", diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index 1bf765df8d..e882e49ef2 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -87,13 +87,13 @@ def test_validate(self, dataset_path: Path, project_path: Path) -> None: def test_predict_with_dataloader(self, dataset_path: Path, project_path: Path) -> None: """Test the predict method of the CLI. - This test uses the MVTecAD dataloader for predict test. + This test uses the MVTec AD dataloader for predict test. Args: dataset_path (Path): Root of the synthetic/original dataset. project_path (Path): Path to temporary project folder. """ - # Test with MVTecAD Dataset + # Test with MVTec AD Dataset AnomalibCLI( args=[ "predict", @@ -188,10 +188,10 @@ def _get_common_cli_args(dataset_path: Path | None, project_path: Path) -> list[ project_path (Path): Path to the project folder. model_name (str): Name of the model. Defaults to None. """ - # We need to set the predict dataloader as MVTecAD and UCSDped do not + # We need to set the predict dataloader as MVTec AD and UCSDped do not # have predict_dataloader attribute defined. if dataset_path: - data_root = f"{dataset_path}/mvtec" + data_root = f"{dataset_path}/MVTecAD" dataclass = "MVTecAD" data_args = [ "--data", diff --git a/tools/inference/README.md b/tools/inference/README.md index 7a534bb213..a0c125c5fa 100644 --- a/tools/inference/README.md +++ b/tools/inference/README.md @@ -14,7 +14,7 @@ Example: python tools/inference/lightning_inference.py \ --model anomalib.models.Padim \ --ckpt_path results/padim/mvtecad/bottle/weights/lightning/model.ckpt \ - --data.path datasets/MVTec/bottle/test/broken_large \ + --data.path datasets/MVTecAD/bottle/test/broken_large \ --output ./outputs ``` @@ -25,7 +25,7 @@ Here is a simple YAML file for Padim Model. ```yaml ckpt_path: results/padim/mvtecad/bottle/weights/lightning/model.ckpt data: - path: datasets/MVTec/bottle/test/broken_large + path: datasets/MVTecAD/bottle/test/broken_large transform: null image_size: - 256 From da1c9d176157ddf6cd08d658110be249178a72da Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 11 Feb 2025 16:08:09 +0000 Subject: [PATCH 06/19] Path fixes Signed-off-by: Samet Akcay --- docs/source/markdown/get_started/migration.md | 2 +- .../snippets/data/transforms/datamodule_train_eval_cli.yaml | 2 +- docs/source/snippets/data/transforms/inference_cli.yaml | 2 +- examples/api/01_getting_started/basic_training.py | 2 +- examples/api/03_models/padim.py | 2 +- examples/api/03_models/patchcore.py | 2 +- examples/api/04_advanced/loggers.py | 2 +- examples/api/05_pipelines/complete_pipeline.py | 2 +- examples/notebooks/100_datamodules/102_mvtec.ipynb | 4 ++-- examples/notebooks/100_datamodules/104_tiling.ipynb | 4 ++-- examples/notebooks/600_loggers/601_mlflow_logging.ipynb | 4 ++-- examples/notebooks/700_metrics/701a_aupimo.ipynb | 4 ++-- examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb | 4 ++-- .../notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb | 4 ++-- src/anomalib/data/__init__.py | 2 +- src/anomalib/data/datamodules/__init__.py | 3 ++- src/anomalib/data/datamodules/image/__init__.py | 3 ++- src/anomalib/pipelines/benchmark/generator.py | 6 +++--- tests/unit/engine/test_engine.py | 2 +- 19 files changed, 29 insertions(+), 27 deletions(-) diff --git a/docs/source/markdown/get_started/migration.md b/docs/source/markdown/get_started/migration.md index fccd0d4052..c9078c481a 100644 --- a/docs/source/markdown/get_started/migration.md +++ b/docs/source/markdown/get_started/migration.md @@ -72,7 +72,7 @@ between the old and new configuration files highlighted in a markdown diff forma + class_path: anomalib.data.MVTecAD + init_args: - path: ./datasets/MVTecAD -+ root: ./datasets/MVTec ++ root: ./datasets/MVTecAD category: bottle image_size: 256 center_crop: null diff --git a/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml b/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml index 60edfe29ac..c60c66295d 100644 --- a/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml +++ b/docs/source/snippets/data/transforms/datamodule_train_eval_cli.yaml @@ -1,6 +1,6 @@ class_path: anomalib.data.MVTecAD init_args: - root: ./datasets/MVTec + root: ./datasets/MVTecAD category: bottle train_batch_size: 32 eval_batch_size: 32 diff --git a/docs/source/snippets/data/transforms/inference_cli.yaml b/docs/source/snippets/data/transforms/inference_cli.yaml index e9d11062a4..b2b7e49d8f 100644 --- a/docs/source/snippets/data/transforms/inference_cli.yaml +++ b/docs/source/snippets/data/transforms/inference_cli.yaml @@ -1,6 +1,6 @@ class_path: anomalib.data.MVTecAD init_args: - root: ./datasets/MVTec + root: ./datasets/MVTecAD category: bottle image_size: [256, 256] train_batch_size: 32 diff --git a/examples/api/01_getting_started/basic_training.py b/examples/api/01_getting_started/basic_training.py index fddc0b998d..5dcff94a3f 100644 --- a/examples/api/01_getting_started/basic_training.py +++ b/examples/api/01_getting_started/basic_training.py @@ -15,7 +15,7 @@ # 2. Create a dataset # MVTecAD is a popular dataset for anomaly detection datamodule = MVTecAD( - root="./datasets/MVTec", # Path to download/store the dataset + root="./datasets/MVTecAD", # Path to download/store the dataset category="bottle", # MVTec category to use train_batch_size=32, # Number of images per training batch eval_batch_size=32, # Number of images per validation/test batch diff --git a/examples/api/03_models/padim.py b/examples/api/03_models/padim.py index 3fdf8e24fd..d564a36849 100644 --- a/examples/api/03_models/padim.py +++ b/examples/api/03_models/padim.py @@ -27,7 +27,7 @@ # 3. Training Pipeline # Set up the complete training pipeline datamodule = MVTecAD( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category="bottle", train_batch_size=32, eval_batch_size=32, # Important for feature extraction diff --git a/examples/api/03_models/patchcore.py b/examples/api/03_models/patchcore.py index 2e23d3bd73..11b97b7ee2 100644 --- a/examples/api/03_models/patchcore.py +++ b/examples/api/03_models/patchcore.py @@ -27,7 +27,7 @@ # 3. Training Pipeline # Set up the complete training pipeline datamodule = MVTecAD( - root="./datasets/MVTec", + root="./datasets/MVTecAD", category="bottle", train_batch_size=32, eval_batch_size=32, # Important for feature extraction diff --git a/examples/api/04_advanced/loggers.py b/examples/api/04_advanced/loggers.py index 26bc63498b..fb2d84ffd1 100644 --- a/examples/api/04_advanced/loggers.py +++ b/examples/api/04_advanced/loggers.py @@ -55,7 +55,7 @@ # 5. Complete Training Example with Logging model = Patchcore() datamodule = MVTecAD( - root=Path("./datasets/MVTec"), + root=Path("./datasets/MVTecAD"), category="bottle", ) diff --git a/examples/api/05_pipelines/complete_pipeline.py b/examples/api/05_pipelines/complete_pipeline.py index 93aa74a1b3..ee4f1bf191 100644 --- a/examples/api/05_pipelines/complete_pipeline.py +++ b/examples/api/05_pipelines/complete_pipeline.py @@ -23,7 +23,7 @@ # Initialize components model = Patchcore() datamodule = MVTecAD( - root=Path("./datasets/MVTec"), + root=Path("./datasets/MVTecAD"), category="bottle", train_batch_size=32, ) diff --git a/examples/notebooks/100_datamodules/102_mvtec.ipynb b/examples/notebooks/100_datamodules/102_mvtec.ipynb index 7f5f229a90..f4b4e33945 100644 --- a/examples/notebooks/100_datamodules/102_mvtec.ipynb +++ b/examples/notebooks/100_datamodules/102_mvtec.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:425b1c0edd05d919b5040d3e7801b99643652c3f026c6c56a47bab65f5f2e7a9 -size 7869 +oid sha256:619bb89f84ddbead28bab398d8222197ee8e791ee6de5b58a68ac5eb498d53e2 +size 7871 diff --git a/examples/notebooks/100_datamodules/104_tiling.ipynb b/examples/notebooks/100_datamodules/104_tiling.ipynb index 02c7591063..c51023ffd2 100644 --- a/examples/notebooks/100_datamodules/104_tiling.ipynb +++ b/examples/notebooks/100_datamodules/104_tiling.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:451cdbb527f910d254712f59b1456b43ac52e0d20be57c5a59784954df3f2b0b -size 9817 +oid sha256:0cd49fed8aa268793bbcc04cd54261b587832c53c03d382c107c95097527cda1 +size 9819 diff --git a/examples/notebooks/600_loggers/601_mlflow_logging.ipynb b/examples/notebooks/600_loggers/601_mlflow_logging.ipynb index f0dadadefc..8bd0688890 100644 --- a/examples/notebooks/600_loggers/601_mlflow_logging.ipynb +++ b/examples/notebooks/600_loggers/601_mlflow_logging.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:074a352435337f915dcae80bd1d203bd3f9c90315fec5022a08b688a1ba93886 -size 8385 +oid sha256:a29336cb0388230b5f12df071eb40372a3b8957a0e6ee907fe13336ed4482678 +size 8387 diff --git a/examples/notebooks/700_metrics/701a_aupimo.ipynb b/examples/notebooks/700_metrics/701a_aupimo.ipynb index e8ebfda2b4..122fd9f77d 100644 --- a/examples/notebooks/700_metrics/701a_aupimo.ipynb +++ b/examples/notebooks/700_metrics/701a_aupimo.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bd5df1910fdb0b54a4200d6aba4be95ed303659d52c708c00c871241f5968be4 -size 11023 +oid sha256:915f32f7013578f81d4641fbffd94fcc35fca4248b557b12f6de5a90f9ea54ac +size 11025 diff --git a/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb b/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb index c65f6a7e4c..57d29b19c2 100644 --- a/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb +++ b/examples/notebooks/700_metrics/701b_aupimo_advanced_i.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:8063f7f19bb53d799edc49e5de4495b0369ebec658613ce71c7dd163aa9a41e4 -size 112179 +oid sha256:468e85580f8accf17208f54de3979465e11346513d35626b430f1c806ec3ca0b +size 112181 diff --git a/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb b/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb index e02f4551c1..1d60a805ee 100644 --- a/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb +++ b/examples/notebooks/700_metrics/701c_aupimo_advanced_ii.ipynb @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0bcbdc7140626d330363b82abf1af0bf4e3fffe37a1746036ff842b92158a63c -size 230525 +oid sha256:858307bd1d38b6a713b8e7090222646a4c16452d9d87f1e707055f5aa3a78e4b +size 230527 diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py index 096a48bb6d..30f7d4d821 100644 --- a/src/anomalib/data/__init__.py +++ b/src/anomalib/data/__init__.py @@ -11,7 +11,7 @@ Example: >>> from anomalib.data import MVTecAD >>> datamodule = MVTecAD( - ... root="./datasets/MVTec", + ... root="./datasets/MVTecAD", ... category="bottle", ... image_size=(256, 256) ... ) diff --git a/src/anomalib/data/datamodules/__init__.py b/src/anomalib/data/datamodules/__init__.py index 4072428384..54c609cecb 100644 --- a/src/anomalib/data/datamodules/__init__.py +++ b/src/anomalib/data/datamodules/__init__.py @@ -14,9 +14,10 @@ "Datumaro", "Folder", "Kolektor", - "MVTec", + "MVTecAD", "Visa", "Avenue", "ShanghaiTech", "UCSDped", + "MVTec", ] diff --git a/src/anomalib/data/datamodules/image/__init__.py b/src/anomalib/data/datamodules/image/__init__.py index f817d4c948..1dda736220 100644 --- a/src/anomalib/data/datamodules/image/__init__.py +++ b/src/anomalib/data/datamodules/image/__init__.py @@ -64,6 +64,7 @@ class ImageDataFormat(str, Enum): "Folder", "Kolektor", "MVTecAD", - "MVTec", # Include both for backward compatibility + "MVTecAD", # Include both for backward compatibility "Visa", + "MVTec", ] diff --git a/src/anomalib/pipelines/benchmark/generator.py b/src/anomalib/pipelines/benchmark/generator.py index 2da6f93dfd..c8e1e73829 100644 --- a/src/anomalib/pipelines/benchmark/generator.py +++ b/src/anomalib/pipelines/benchmark/generator.py @@ -10,7 +10,7 @@ >>> args = { ... "seed": 42, ... "model": {"class_path": "Padim"}, - ... "data": {"class_path": "MVTec", "init_args": {"category": "bottle"}} + ... "data": {"class_path": "MVTecAD", "init_args": {"category": "bottle"}} ... } >>> jobs = list(generator.generate_jobs(args, None)) @@ -51,7 +51,7 @@ class BenchmarkJobGenerator(JobGenerator): >>> args = { ... "seed": 42, ... "model": {"class_path": "Padim"}, - ... "data": {"class_path": "MVTec", "init_args": {"category": "bottle"}} + ... "data": {"class_path": "MVTecAD", "init_args": {"category": "bottle"}} ... } >>> jobs = list(generator.generate_jobs(args, None)) """ @@ -91,7 +91,7 @@ def generate_jobs( >>> args = { ... "seed": 42, ... "model": {"class_path": "Padim"}, - ... "data": {"class_path": "MVTec"} + ... "data": {"class_path": "MVTecAD"} ... } >>> jobs = list(generator.generate_jobs(args, None)) """ diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py index 98aa830ec3..e38c01e634 100644 --- a/tests/unit/engine/test_engine.py +++ b/tests/unit/engine/test_engine.py @@ -78,7 +78,7 @@ def fxt_full_config_path(tmp_path: Path) -> Path: data: class_path: anomalib.data.MVTec init_args: - root: datasets/MVTec + root: datasets/MVTecAD category: bottle train_batch_size: 32 eval_batch_size: 32 From ec69b592cf343483c92fe229664fa3df7d5e964f Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 12 Feb 2025 12:42:06 +0000 Subject: [PATCH 07/19] Add MVTecAD2 dataset and datamodule Signed-off-by: Samet Akcay --- examples/api/02_data/mvtecad2.py | 147 +++++++++ examples/configs/data/mvtecad2.yaml | 11 + src/anomalib/data/__init__.py | 3 +- src/anomalib/data/datamodules/base/image.py | 3 + .../data/datamodules/image/__init__.py | 6 +- .../data/datamodules/image/mvtecad2.py | 217 +++++++++++++ src/anomalib/data/datasets/base/image.py | 14 +- src/anomalib/data/datasets/image/__init__.py | 5 +- src/anomalib/data/datasets/image/mvtecad2.py | 291 ++++++++++++++++++ src/anomalib/data/utils/label.py | 6 + src/anomalib/data/utils/split.py | 2 + tests/helpers/data.py | 68 +++- .../data/datamodule/image/test_mvtecad2.py | 63 ++++ 13 files changed, 823 insertions(+), 13 deletions(-) create mode 100644 examples/api/02_data/mvtecad2.py create mode 100644 examples/configs/data/mvtecad2.yaml create mode 100644 src/anomalib/data/datamodules/image/mvtecad2.py create mode 100644 src/anomalib/data/datasets/image/mvtecad2.py create mode 100644 tests/unit/data/datamodule/image/test_mvtecad2.py diff --git a/examples/api/02_data/mvtecad2.py b/examples/api/02_data/mvtecad2.py new file mode 100644 index 0000000000..b450838f55 --- /dev/null +++ b/examples/api/02_data/mvtecad2.py @@ -0,0 +1,147 @@ +"""Example showing how to use the MVTec AD 2 dataset with Anomalib. + +This example demonstrates how to: +1. Load and visualize the MVTec AD 2 dataset +2. Create a datamodule and use it for training +3. Access different test sets (public, private, mixed) +4. Work with custom transforms and visualization +""" + +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import matplotlib.pyplot as plt +import torch +from torch.utils.data import DataLoader +from torchvision.transforms.v2 import Compose, Resize, ToDtype, ToImage + +from anomalib.data import MVTecAD2 +from anomalib.data.datasets.base.image import ImageItem +from anomalib.data.datasets.image.mvtecad2 import MVTecAD2Dataset, TestType +from anomalib.data.utils import Split + +# 1. Basic Usage +print("1. Basic Usage") +datamodule = MVTecAD2( + root="./datasets/MVTec_AD_2", + category="sheet_metal", + train_batch_size=32, + eval_batch_size=32, + num_workers=8, +) +datamodule.setup() # This will prepare the dataset + +# Print some information about the splits +print(f"Number of training samples: {len(datamodule.train_data)}") +print(f"Number of validation samples: {len(datamodule.val_data)}") +print(f"Number of test samples (public): {len(datamodule.test_public_data)}") +print(f"Number of test samples (private): {len(datamodule.test_private_data)}") +print(f"Number of test samples (private mixed): {len(datamodule.test_private_mixed_data)}") + +# 2. Custom Transforms +print("\n2. Custom Transforms") +transform = Compose([ + ToImage(), + Resize((256, 256)), + ToDtype(torch.float32, scale=True), +]) + +# Create dataset with custom transform +datamodule = MVTecAD2( + root="./datasets/MVTec_AD_2", + category="sheet_metal", + train_augmentations=transform, + val_augmentations=transform, + test_augmentations=transform, +) +datamodule.setup() + +# 3. Different Test Sets +print("\n3. Accessing Different Test Sets") + +# Get loaders for each test set +public_loader = datamodule.test_dataloader(test_type=TestType.PUBLIC) +private_loader = datamodule.test_dataloader(test_type=TestType.PRIVATE) +mixed_loader = datamodule.test_dataloader(test_type=TestType.PRIVATE_MIXED) + +# Get sample batches +public_batch = next(iter(public_loader)) +private_batch = next(iter(private_loader)) +mixed_batch = next(iter(mixed_loader)) + +print("Public test batch shape:", public_batch.image.shape) +print("Private test batch shape:", private_batch.image.shape) +print("Private mixed test batch shape:", mixed_batch.image.shape) + +# 4. Advanced Usage - Direct Dataset Access +print("\n4. Advanced Usage") + +# Create datasets for each split +train_dataset = MVTecAD2Dataset( + root="./datasets/MVTec_AD_2", + category="sheet_metal", + split=Split.TRAIN, + augmentations=transform, +) + +test_dataset = MVTecAD2Dataset( + root="./datasets/MVTec_AD_2", + category="sheet_metal", + split=Split.TEST, + test_type=TestType.PUBLIC, # Use public test set + augmentations=transform, +) + +# Create dataloaders +train_loader = DataLoader(train_dataset, batch_size=4, shuffle=True, collate_fn=train_dataset.collate_fn) +test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, collate_fn=test_dataset.collate_fn) + +# Get some sample images +train_samples = next(iter(train_loader)) +test_samples = next(iter(test_loader)) + +print("Train Dataset:") +print(f"- Number of samples: {len(train_dataset)}") +print(f"- Image shape: {train_samples.image.shape}") +print(f"- Labels: {train_samples.gt_label}") + +print("\nTest Dataset:") +print(f"- Number of samples: {len(test_dataset)}") +print(f"- Image shape: {test_samples.image.shape}") +print(f"- Labels: {test_samples.gt_label}") +if hasattr(test_samples, "gt_mask") and test_samples.gt_mask is not None: + print(f"- Mask shape: {test_samples.gt_mask.shape}") + + +# 5. Visualize some samples +def show_samples(samples: ImageItem, title: str) -> None: + """Helper function to display samples.""" + if samples.image is None or samples.gt_label is None: + msg = "Samples must have image and label data" + raise ValueError(msg) + + fig, axes = plt.subplots(1, 4, figsize=(15, 4)) + fig.suptitle(title) + + for i in range(4): + img = samples.image[i].permute(1, 2, 0).numpy() + axes[i].imshow(img) + axes[i].axis("off") + if hasattr(samples, "gt_mask") and samples.gt_mask is not None: + mask = samples.gt_mask[i].squeeze().numpy() + axes[i].imshow(mask, alpha=0.3, cmap="Reds") + label = "Normal" if samples.gt_label[i] == 0 else "Anomaly" + axes[i].set_title(label) + + plt.tight_layout() + plt.show() + + +# Show training samples (normal only) +show_samples(train_samples, "Training Samples (Normal)") + +# Show test samples (mix of normal and anomalous) +show_samples(test_samples, "Test Samples (Normal + Anomalous)") + +if __name__ == "__main__": + print("\nMVTec AD 2 Dataset example completed successfully!") diff --git a/examples/configs/data/mvtecad2.yaml b/examples/configs/data/mvtecad2.yaml new file mode 100644 index 0000000000..41ce5b9379 --- /dev/null +++ b/examples/configs/data/mvtecad2.yaml @@ -0,0 +1,11 @@ +class_path: anomalib.data.MVTecAD2 +init_args: + root: "./datasets/MVTec_AD_2" + category: "sheet_metal" + train_batch_size: 32 + eval_batch_size: 32 + num_workers: 8 + test_split_mode: from_dir + val_split_mode: from_dir + test_type: "public" + seed: null diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py index 30f7d4d821..dff69b3f09 100644 --- a/src/anomalib/data/__init__.py +++ b/src/anomalib/data/__init__.py @@ -49,7 +49,7 @@ # Datamodules from .datamodules.base import AnomalibDataModule from .datamodules.depth import DepthDataFormat, Folder3D, MVTec3D -from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTecAD, Visa +from .datamodules.image import BTech, Datumaro, Folder, ImageDataFormat, Kolektor, MVTecAD, MVTecAD2, Visa from .datamodules.video import Avenue, ShanghaiTech, UCSDped, VideoDataFormat # Datasets @@ -162,6 +162,7 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule "KolektorDataset", "MVTecAD", "MVTecADDataset", + "MVTecAD2", "Visa", "VisaDataset", # Video diff --git a/src/anomalib/data/datamodules/base/image.py b/src/anomalib/data/datamodules/base/image.py index 887349b02c..81b6e2140e 100644 --- a/src/anomalib/data/datamodules/base/image.py +++ b/src/anomalib/data/datamodules/base/image.py @@ -317,6 +317,9 @@ def _create_val_split(self) -> None: This handles sampling from train/test sets and optionally creating synthetic anomalies. """ + if self.val_split_mode == ValSplitMode.FROM_DIR: + # If the validation split mode is FROM_DIR, we don't need to create a validation set + return if self.val_split_mode == ValSplitMode.FROM_TRAIN: # randomly sample from train set self.train_data, self.val_data = random_split( diff --git a/src/anomalib/data/datamodules/image/__init__.py b/src/anomalib/data/datamodules/image/__init__.py index 1dda736220..581c3d7356 100644 --- a/src/anomalib/data/datamodules/image/__init__.py +++ b/src/anomalib/data/datamodules/image/__init__.py @@ -8,6 +8,7 @@ - ``Folder``: Custom folder structure with normal/abnormal images - ``Kolektor``: Kolektor Surface-Defect Dataset - ``MVTecAD``: MVTec Anomaly Detection Dataset +- ``MVTecAD2``: MVTec Anomaly Detection Dataset 2 - ``Visa``: Visual Inspection for Steel Anomaly Dataset Example: @@ -30,6 +31,7 @@ from .folder import Folder from .kolektor import Kolektor from .mvtecad import MVTec, MVTecAD +from .mvtecad2 import MVTecAD2 from .visa import Visa @@ -44,6 +46,7 @@ class ImageDataFormat(str, Enum): - ``FOLDER_3D``: Custom folder structure for 3D images - ``KOLEKTOR``: Kolektor Surface-Defect Dataset - ``MVTEC_AD``: MVTec AD Dataset + - ``MVTEC_AD_2``: MVTec AD 2 Dataset - ``MVTEC_3D``: MVTec 3D AD Dataset - ``VISA``: Visual Inspection for Steel Anomaly Dataset """ @@ -54,6 +57,7 @@ class ImageDataFormat(str, Enum): FOLDER_3D = "folder_3d" KOLEKTOR = "kolektor" MVTEC_AD = "mvtecad" + MVTEC_AD_2 = "mvtecad2" MVTEC_3D = "mvtec_3d" VISA = "visa" @@ -64,7 +68,7 @@ class ImageDataFormat(str, Enum): "Folder", "Kolektor", "MVTecAD", - "MVTecAD", # Include both for backward compatibility + "MVTecAD2", "Visa", "MVTec", ] diff --git a/src/anomalib/data/datamodules/image/mvtecad2.py b/src/anomalib/data/datamodules/image/mvtecad2.py new file mode 100644 index 0000000000..729fccbeb9 --- /dev/null +++ b/src/anomalib/data/datamodules/image/mvtecad2.py @@ -0,0 +1,217 @@ +"""MVTec AD 2 Lightning Data Module. + +This module implements a PyTorch Lightning DataModule for the MVTec AD 2 dataset. +The module handles downloading, loading, and preprocessing of the dataset for +training and evaluation. + +The dataset provides three different test sets: + - Public test set (test_public/): Contains both normal and anomalous samples with ground truth masks + - Private test set (test_private/): Contains unseen test samples without ground truth + - Private mixed test set (test_private_mixed/): Contains unseen test samples + with mixed anomalies without ground truth + +The public test set is used for standard evaluation, while the private test sets +are used for real-world evaluation scenarios where ground truth is not available. + +License: + MVTec AD 2 dataset is released under the Creative Commons + Attribution-NonCommercial-ShareAlike 4.0 International License + (CC BY-NC-SA 4.0) https://creativecommons.org/licenses/by-nc-sa/4.0/ + +Reference: + Lars Heckler-Kram, Jan-Hendrik Neudeck, Ulla Scheler, Rebecca KΓΆnig, Carsten Steger: + The MVTec AD 2 Dataset: Advanced Scenarios for Unsupervised Anomaly Detection. + arXiv preprint, 2024 (to appear). +""" + +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +from lightning.pytorch.utilities.types import EVAL_DATALOADERS +from torch.utils.data import DataLoader +from torchvision.transforms.v2 import Transform + +from anomalib.data.datamodules.base.image import AnomalibDataModule +from anomalib.data.datasets.image import MVTecAD2Dataset +from anomalib.data.datasets.image.mvtecad2 import TestType +from anomalib.data.utils import Split + + +class MVTecAD2(AnomalibDataModule): + """MVTec AD 2 Lightning Data Module. + + Args: + root (str | Path): Path to the dataset root directory. + Defaults to ``"./datasets/MVTec_AD_2"``. + category (str): Name of the MVTec AD 2 category to load. + Defaults to ``"sheet_metal"``. + train_batch_size (int, optional): Training batch size. + Defaults to ``32``. + eval_batch_size (int, optional): Validation and test batch size. + Defaults to ``32``. + num_workers (int, optional): Number of workers for data loading. + Defaults to ``8``. + train_augmentations (Transform | None): Augmentations to apply to the training images + Defaults to ``None``. + val_augmentations (Transform | None): Augmentations to apply to the validation images. + Defaults to ``None``. + test_augmentations (Transform | None): Augmentations to apply to the test images. + Defaults to ``None``. + augmentations (Transform | None): General augmentations to apply if stage-specific + augmentations are not provided. + test_type (str | TestType): Type of test set to use - ``"public"``, ``"private"``, + or ``"private_mixed"``. This determines which test set is returned by + test_dataloader(). Defaults to ``TestType.PUBLIC``. + seed (int | None, optional): Random seed for reproducibility. + Defaults to ``None``. + + Example: + >>> from anomalib.data import MVTecAD2 + >>> datamodule = MVTecAD2( + ... root="./datasets/MVTec_AD_2", + ... category="sheet_metal", + ... train_batch_size=32, + ... eval_batch_size=32, + ... num_workers=8, + ... ) + + To use private test set: + >>> datamodule = MVTecAD2( + ... root="./datasets/MVTec_AD_2", + ... category="sheet_metal", + ... test_type="private", + ... ) + + Access different test sets: + >>> datamodule.setup() + >>> public_loader = datamodule.test_dataloader() # returns loader based on test_type + >>> private_loader = datamodule.test_dataloader(test_type="private") + >>> mixed_loader = datamodule.test_dataloader(test_type="private_mixed") + """ + + def __init__( + self, + root: str | Path = "./datasets/MVTec_AD_2", + category: str = "sheet_metal", + train_batch_size: int = 32, + eval_batch_size: int = 32, + num_workers: int = 8, + train_augmentations: Transform | None = None, + val_augmentations: Transform | None = None, + test_augmentations: Transform | None = None, + augmentations: Transform | None = None, + test_type: str | TestType = TestType.PUBLIC, + seed: int | None = None, + ) -> None: + """Initialize MVTec AD 2 datamodule.""" + super().__init__( + train_batch_size=train_batch_size, + eval_batch_size=eval_batch_size, + num_workers=num_workers, + train_augmentations=train_augmentations, + val_augmentations=val_augmentations, + test_augmentations=test_augmentations, + augmentations=augmentations, + seed=seed, + ) + + self.root = Path(root) + self.category = category + self.test_type = TestType(test_type) if isinstance(test_type, str) else test_type + + def prepare_data(self) -> None: + """Prepare the dataset. + + MVTec AD 2 dataset needs to be downloaded manually from MVTec AD 2 website. + """ + # NOTE: For now, users need to manually download the dataset. + + def _setup(self, _stage: str | None = None) -> None: + """Set up the datasets and perform train/validation/test split. + + Args: + _stage: str | None: Optional argument for compatibility with pytorch + lightning. Defaults to None. + """ + self.train_data = MVTecAD2Dataset( + root=self.root, + category=self.category, + split=Split.TRAIN, + augmentations=self.train_augmentations, + ) + + # MVTec AD 2 has a dedicated validation set + self.val_data = MVTecAD2Dataset( + root=self.root, + category=self.category, + split=Split.VAL, + augmentations=self.val_augmentations, + ) + + # Create datasets for all test types + self.test_public_data = MVTecAD2Dataset( + root=self.root, + category=self.category, + split=Split.TEST, + test_type=TestType.PUBLIC, + augmentations=self.test_augmentations, + ) + + self.test_private_data = MVTecAD2Dataset( + root=self.root, + category=self.category, + split=Split.TEST, + test_type=TestType.PRIVATE, + augmentations=self.test_augmentations, + ) + + self.test_private_mixed_data = MVTecAD2Dataset( + root=self.root, + category=self.category, + split=Split.TEST, + test_type=TestType.PRIVATE_MIXED, + augmentations=self.test_augmentations, + ) + + # Always set test_data to public test set for standard evaluation + self.test_data = self.test_public_data + + def test_dataloader(self, test_type: str | TestType | None = None) -> EVAL_DATALOADERS: + """Get test dataloader for the specified test type. + + Args: + test_type (str | TestType | None, optional): Type of test set to use. + If None, uses the test_type specified in __init__. + Defaults to None. + + Example: + >>> datamodule.setup() + >>> public_loader = datamodule.test_dataloader() # returns loader based on test_type + >>> private_loader = datamodule.test_dataloader(test_type="private") + >>> mixed_loader = datamodule.test_dataloader(test_type="private_mixed") + + Returns: + EVAL_DATALOADERS: Test dataloader for the specified test type. + """ + test_type = test_type or self.test_type + test_type = TestType(test_type) if isinstance(test_type, str) else test_type + + if test_type == TestType.PUBLIC: + dataset = self.test_public_data + elif test_type == TestType.PRIVATE: + dataset = self.test_private_data + elif test_type == TestType.PRIVATE_MIXED: + dataset = self.test_private_mixed_data + else: + msg = f"Invalid test type: {test_type}. Must be one of {TestType.__members__.keys()}." + raise ValueError(msg) + + return DataLoader( + dataset=dataset, + shuffle=False, + batch_size=self.eval_batch_size, + num_workers=self.num_workers, + collate_fn=dataset.collate_fn, + ) diff --git a/src/anomalib/data/datasets/base/image.py b/src/anomalib/data/datasets/base/image.py index e4bcc526b5..4cd659e1c0 100644 --- a/src/anomalib/data/datasets/base/image.py +++ b/src/anomalib/data/datasets/base/image.py @@ -254,9 +254,6 @@ def __getitem__(self, index: int) -> DatasetItem: Returns: DatasetItem: Dataset item containing image and ground truth (if available). - Raises: - ValueError: If task type is unknown. - Example: >>> dataset = AnomalibDataset() >>> item = dataset[0] @@ -273,15 +270,16 @@ def __getitem__(self, index: int) -> DatasetItem: if self.task == TaskType.CLASSIFICATION: item["image"] = self.augmentations(image) if self.augmentations else image elif self.task == TaskType.SEGMENTATION: - # Only Anomalous (1) images have masks in anomaly datasets - # Therefore, create empty mask for Normal (0) images. + # Create empty mask for: + # - Normal samples (label_index = 0) + # - Unknown samples (label_index = -1) + # Only use mask for anomalous samples (label_index = 1) mask = ( Mask(torch.zeros(image.shape[-2:])).to(torch.uint8) - if label_index == LabelName.NORMAL + if label_index in {LabelName.NORMAL, LabelName.UNKNOWN} else read_mask(mask_path, as_tensor=True) ) item["image"], item["gt_mask"] = self.augmentations(image, mask) if self.augmentations else (image, mask) - else: msg = f"Unknown task type: {self.task}" raise ValueError(msg) @@ -289,7 +287,7 @@ def __getitem__(self, index: int) -> DatasetItem: return ImageItem( image=item["image"], gt_mask=item.get("gt_mask"), - gt_label=int(label_index), + gt_label=torch.tensor(label_index), # Convert to tensor to match type hints image_path=image_path, mask_path=mask_path, ) diff --git a/src/anomalib/data/datasets/image/__init__.py b/src/anomalib/data/datasets/image/__init__.py index 8211fdf25b..2a6213bbd7 100644 --- a/src/anomalib/data/datasets/image/__init__.py +++ b/src/anomalib/data/datasets/image/__init__.py @@ -26,7 +26,8 @@ from .datumaro import DatumaroDataset from .folder import FolderDataset from .kolektor import KolektorDataset -from .mvtecad import MVTecADDataset, MVTecDataset +from .mvtecad import MVTecADDataset +from .mvtecad2 import MVTecAD2Dataset from .visa import VisaDataset __all__ = [ @@ -34,7 +35,7 @@ "DatumaroDataset", "FolderDataset", "KolektorDataset", - "MVTecDataset", "MVTecADDataset", + "MVTecAD2Dataset", "VisaDataset", ] diff --git a/src/anomalib/data/datasets/image/mvtecad2.py b/src/anomalib/data/datasets/image/mvtecad2.py new file mode 100644 index 0000000000..03f994caa3 --- /dev/null +++ b/src/anomalib/data/datasets/image/mvtecad2.py @@ -0,0 +1,291 @@ +"""MVTec AD 2 Dataset. + +This module provides PyTorch Dataset implementation for the MVTec AD 2 dataset. +The dataset contains 8 categories of industrial objects with both normal and +anomalous samples. Each category includes RGB images and pixel-level ground truth +masks for anomaly segmentation. + +The dataset provides three different test sets: + - Public test set (test_public/): Contains both normal and anomalous samples with ground truth masks + - Private test set (test_private/): Contains unseen test samples without ground truth + - Private mixed test set (test_private_mixed/): Contains unseen test samples + with mixed anomalies without ground truth + +The public test set is used for standard evaluation, while the private test sets +are used for real-world evaluation scenarios where ground truth is not available. + +License: + MVTec AD 2 dataset is released under the Creative Commons + Attribution-NonCommercial-ShareAlike 4.0 International License + (CC BY-NC-SA 4.0) https://creativecommons.org/licenses/by-nc-sa/4.0/ + +Reference: + Lars Heckler-Kram, Jan-Hendrik Neudeck, Ulla Scheler, Rebecca KΓΆnig, Carsten Steger: + The MVTec AD 2 Dataset: Advanced Scenarios for Unsupervised Anomaly Detection. + arXiv preprint, 2024 (to appear). +""" + +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from collections.abc import Sequence +from enum import Enum +from pathlib import Path + +from pandas import DataFrame +from torchvision.transforms.v2 import Transform + +from anomalib.data.datasets.base.image import AnomalibDataset +from anomalib.data.errors import MisMatchError +from anomalib.data.utils import Split, validate_path + + +class TestType(str, Enum): + """Type of test set to use. + + The MVTec AD 2 dataset provides three different test sets: + - PUBLIC: Standard test set with ground truth masks for evaluation + - PRIVATE: Unseen test set without ground truth for real-world testing + - PRIVATE_MIXED: Unseen test set with mixed anomalies without ground truth + """ + + PUBLIC = "public" # Test set with ground truth + PRIVATE = "private" # Private test set without ground truth + PRIVATE_MIXED = "private_mixed" # Private test set with mixed anomalies + + +IMG_EXTENSIONS = (".png", ".PNG") +CATEGORIES = ( + "can", + "fabric", + "fruit_jelly", + "rice", + "sheet_metal", + "vial", + "wallplugs", + "walnuts", +) + + +class MVTecAD2Dataset(AnomalibDataset): + """MVTec AD 2 dataset class. + + Args: + root (Path | str): Path to the root of the dataset. + Defaults to ``"./datasets/MVTec_AD_2"``. + category (str): Category name, e.g. ``"sheet_metal"``. + Defaults to ``"sheet_metal"``. + augmentations (Transform, optional): Augmentations that should be applied to the input images. + Defaults to ``None``. + split (str | Split | None): Dataset split - usually ``Split.TRAIN``, ``Split.VAL``, + or ``Split.TEST``. Defaults to ``None``. + test_type (str | TestType): Type of test set to use - ``"public"``, ``"private"``, + or ``"private_mixed"``. Only used when split is ``Split.TEST``. + - ``"public"``: Standard test set with ground truth masks + - ``"private"``: Unseen test set without ground truth + - ``"private_mixed"``: Unseen test set with mixed anomalies + Defaults to ``TestType.PUBLIC``. + + Example: + Create training dataset:: + + >>> from pathlib import Path + >>> dataset = MVTecAD2Dataset( + ... root=Path("./datasets/MVTec_AD_2"), + ... category="sheet_metal", + ... split="train" + ... ) + + Create validation dataset:: + + >>> val_dataset = MVTecAD2Dataset( + ... root=Path("./datasets/MVTec_AD_2"), + ... category="sheet_metal", + ... split="val" + ... ) + + Create test datasets:: + + >>> # Public test set (with ground truth) + >>> test_dataset = MVTecAD2Dataset( + ... root=Path("./datasets/MVTec_AD_2"), + ... category="sheet_metal", + ... split="test", + ... test_type="public" + ... ) + + >>> # Private test set (without ground truth) + >>> private_dataset = MVTecAD2Dataset( + ... root=Path("./datasets/MVTec_AD_2"), + ... category="sheet_metal", + ... split="test", + ... test_type="private" + ... ) + + >>> # Private mixed test set (without ground truth) + >>> mixed_dataset = MVTecAD2Dataset( + ... root=Path("./datasets/MVTec_AD_2"), + ... category="sheet_metal", + ... split="test", + ... test_type="private_mixed" + ... ) + + Notes: + - The public test set contains both normal and anomalous samples with ground truth masks + - Private test sets (private and private_mixed) contain samples without ground truth + - Private test samples are labeled as "unknown" with label_index=-1 + """ + + def __init__( + self, + root: Path | str = "./datasets/MVTec_AD_2", + category: str = "sheet_metal", + augmentations: Transform | None = None, + split: str | Split | None = None, + test_type: TestType | str = TestType.PUBLIC, + ) -> None: + super().__init__(augmentations=augmentations) + + self.root_category = Path(root) / Path(category) + self.split = split + self.test_type = TestType(test_type) if isinstance(test_type, str) else test_type + self.samples = make_mvtec2_dataset( + self.root_category, + split=self.split, + test_type=self.test_type, + extensions=IMG_EXTENSIONS, + ) + + +def make_mvtec2_dataset( + root: str | Path, + split: str | Split | None = None, + test_type: TestType = TestType.PUBLIC, + extensions: Sequence[str] | None = None, +) -> DataFrame: + """Create MVTec AD 2 samples by parsing the data directory structure. + + The files are expected to follow this structure:: + + root/ + β”œβ”€β”€ test_private/ + β”œβ”€β”€ test_private_mixed/ + β”œβ”€β”€ test_public/ + β”‚ β”œβ”€β”€ bad/ + β”‚ β”œβ”€β”€ good/ + β”‚ └── ground_truth/ + β”‚ └── bad/ + β”œβ”€β”€ train/ + β”‚ └── good/ + └── validation/ + └── good/ + + Args: + root (Path | str): Path to dataset root directory + split (str | Split | None, optional): Dataset split (train, validation or test) + Defaults to ``None``. + test_type (TestType): Type of test set to use - public, private, or private_mixed. + Only used when split is test. Defaults to ``TestType.PUBLIC``. + extensions (Sequence[str] | None, optional): Valid file extensions + Defaults to ``None``. + + Returns: + DataFrame: Dataset samples with columns: + - path: Base path to dataset + - split: Dataset split (train/test) + - label: Class label + - image_path: Path to image file + - mask_path: Path to mask file (if available) + - label_index: Numeric label (0=normal, 1=abnormal) + + Example: + >>> root = Path("./datasets/MVTec_AD_2/sheet_metal") + >>> samples = make_mvtec2_dataset(root, split="train") + >>> samples.head() + path split label image_path mask_path label_index + 0 datasets/MVTec_AD_2 train good [...]/good/105.png 0 + 1 datasets/MVTec_AD_2 train good [...]/good/017.png 0 + + Raises: + RuntimeError: If no valid images are found + MisMatchError: If anomalous images and masks don't match + """ + if extensions is None: + extensions = IMG_EXTENSIONS + + root = validate_path(root) + samples_list: list[tuple[str, str, str, str, str | None, int]] = [] + + # Get all image files + image_files = [f for f in root.glob("**/*") if f.suffix in extensions] + if not image_files: + msg = f"Found 0 images in {root}" + raise RuntimeError(msg) + + # Process training samples (only normal) + train_path = root / "train" / "good" + if train_path.exists(): + train_samples = [ + (str(root), "train", "good", str(f), None, 0) for f in train_path.glob(f"*[{''.join(extensions)}]") + ] + samples_list.extend(train_samples) + + # Process validation samples (only normal) + val_path = root / "validation" / "good" + if val_path.exists(): + val_samples = [(str(root), "val", "good", str(f), None, 0) for f in val_path.glob(f"*[{''.join(extensions)}]")] + samples_list.extend(val_samples) + + # Process test samples based on test_type + if test_type == TestType.PUBLIC: + test_path = root / "test_public" + if test_path.exists(): + # Normal test samples + test_normal_path = test_path / "good" + test_normal_samples = [ + (str(root), "test", "good", str(f), None, 0) for f in test_normal_path.glob(f"*[{''.join(extensions)}]") + ] + samples_list.extend(test_normal_samples) + + # Abnormal test samples + test_abnormal_path = test_path / "bad" + if test_abnormal_path.exists(): + for image_path in test_abnormal_path.glob(f"*[{''.join(extensions)}]"): + # Add _mask suffix to the filename + mask_name = image_path.stem + "_mask" + image_path.suffix + mask_path = root / "test_public" / "ground_truth" / "bad" / mask_name + if not mask_path.exists(): + msg = f"Missing mask for anomalous image: {image_path}" + raise MisMatchError(msg) + samples_list.append( + (str(root), "test", "bad", str(image_path), str(mask_path), 1), + ) + elif test_type == TestType.PRIVATE: + test_path = root / "test_private" + if test_path.exists(): + # All samples in private test set are treated as unknown + test_samples = [ + (str(root), "test", "unknown", str(f), None, -1) for f in test_path.glob(f"*[{''.join(extensions)}]") + ] + samples_list.extend(test_samples) + elif test_type == TestType.PRIVATE_MIXED: + test_path = root / "test_private_mixed" + if test_path.exists(): + # All samples in private mixed test set are treated as unknown + test_samples = [ + (str(root), "test", "unknown", str(f), None, -1) for f in test_path.glob(f"*[{''.join(extensions)}]") + ] + samples_list.extend(test_samples) + + samples = DataFrame( + samples_list, + columns=["path", "split", "label", "image_path", "mask_path", "label_index"], + ) + + # Filter by split if specified + if split: + split = Split(split) if isinstance(split, str) else split + samples = samples[samples.split == split.value] + + samples.attrs["task"] = "segmentation" + return samples diff --git a/src/anomalib/data/utils/label.py b/src/anomalib/data/utils/label.py index ce12b8bfb2..f4da21da8d 100644 --- a/src/anomalib/data/utils/label.py +++ b/src/anomalib/data/utils/label.py @@ -3,6 +3,7 @@ This module defines an enumeration class for labeling data in anomaly detection tasks. The labels are represented as integers, where: +- ``UNKNOWN`` (-1): Represents samples with unknown/undefined labels - ``NORMAL`` (0): Represents normal/good samples - ``ABNORMAL`` (1): Represents anomalous/defective samples @@ -14,6 +15,9 @@ >>> label = LabelName.ABNORMAL >>> label.value 1 + >>> label = LabelName.UNKNOWN + >>> label.value + -1 """ # Copyright (C) 2023-2024 Intel Corporation @@ -30,9 +34,11 @@ class LabelName(int, Enum): names and their corresponding integer values. Attributes: + UNKNOWN (int): Label value -1, representing samples with unknown/undefined labels NORMAL (int): Label value 0, representing normal/good samples ABNORMAL (int): Label value 1, representing anomalous/defective samples """ + UNKNOWN = -1 NORMAL = 0 ABNORMAL = 1 diff --git a/src/anomalib/data/utils/split.py b/src/anomalib/data/utils/split.py index e2d9b5a6b3..cb5335b63f 100644 --- a/src/anomalib/data/utils/split.py +++ b/src/anomalib/data/utils/split.py @@ -80,6 +80,7 @@ class ValSplitMode(str, Enum): FROM_TRAIN: Split from training set FROM_TEST: Split from test set SYNTHETIC: Synthetic validation split + FROM_DIR: Use dedicated validation directory (for datasets that have one) """ NONE = "none" @@ -87,6 +88,7 @@ class ValSplitMode(str, Enum): FROM_TRAIN = "from_train" FROM_TEST = "from_test" SYNTHETIC = "synthetic" + FROM_DIR = "from_dir" def concatenate_datasets( diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 444c621bd9..a993b1d473 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -1,6 +1,6 @@ """Test Helpers - Dataset.""" -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -468,6 +468,72 @@ def _generate_dummy_visa_dataset(self) -> None: self.dataset_root = self.dataset_root.parent / "visa_pytorch" self._generate_dummy_mvtecad_dataset(normal_dir="good", abnormal_dir="bad", image_extension=".jpg") + def _generate_dummy_mvtecad2_dataset( + self, + normal_dir: str = "good", + abnormal_dir: str = "bad", + image_extension: str = ".png", + mask_suffix: str = "_mask", + mask_extension: str = ".png", + ) -> None: + """Generate a dummy MVTec AD 2 dataset. + + Args: + normal_dir (str, optional): Name of the normal directory. Defaults to "good". + abnormal_dir (str, optional): Name of the abnormal directory. Defaults to "bad". + image_extension (str, optional): Extension of the image files. Defaults to ".png". + mask_suffix (str, optional): Suffix to append to mask filenames. Defaults to "_mask". + mask_extension (str, optional): Extension of the mask files. Defaults to ".png". + """ + # MVTec AD 2 has multiple subcategories within the dataset + dataset_category = "dummy" + category_root = self.dataset_root / dataset_category + + # Create train directory with normal images + train_path = category_root / "train" / normal_dir + for i in range(self.num_train): + image_path = train_path / f"{i:03d}_regular{image_extension}" + self.image_generator.generate_image(label=LabelName.NORMAL, image_filename=image_path) + + # Create validation directory with normal images + val_path = category_root / "validation" / normal_dir + for i in range(self.num_test): + image_path = val_path / f"{i:03d}_regular{image_extension}" + self.image_generator.generate_image(label=LabelName.NORMAL, image_filename=image_path) + + # Create public test directory with normal and abnormal images + test_public_path = category_root / "test_public" + + # Normal test images + test_normal_path = test_public_path / normal_dir + for i in range(self.num_test): + image_path = test_normal_path / f"{i:03d}_regular{image_extension}" + self.image_generator.generate_image(label=LabelName.NORMAL, image_filename=image_path) + + # Abnormal test images with masks + test_abnormal_path = test_public_path / abnormal_dir + test_mask_path = test_public_path / "ground_truth" / abnormal_dir + for i in range(self.num_test): + image_path = test_abnormal_path / f"{i:03d}_regular{image_extension}" + mask_path = test_mask_path / f"{i:03d}_regular{mask_suffix}{mask_extension}" + self.image_generator.generate_image( + label=LabelName.ABNORMAL, + image_filename=image_path, + mask_filename=mask_path, + ) + + # Create private test directory with unknown images + test_private_path = category_root / "test_private" + for i in range(self.num_test): + image_path = test_private_path / f"{i:03d}_regular{image_extension}" + self.image_generator.generate_image(label=LabelName.NORMAL, image_filename=image_path) + + # Create private mixed test directory with unknown images + test_private_mixed_path = category_root / "test_private_mixed" + for i in range(self.num_test): + image_path = test_private_mixed_path / f"{i:03d}_regular{image_extension}" + self.image_generator.generate_image(label=LabelName.NORMAL, image_filename=image_path) + class DummyVideoDatasetGenerator(DummyDatasetGenerator): """Dummy video dataset generator. diff --git a/tests/unit/data/datamodule/image/test_mvtecad2.py b/tests/unit/data/datamodule/image/test_mvtecad2.py new file mode 100644 index 0000000000..cc521c43e5 --- /dev/null +++ b/tests/unit/data/datamodule/image/test_mvtecad2.py @@ -0,0 +1,63 @@ +"""Unit tests - MVTec AD 2 Datamodule.""" + +# Copyright (C) 2025 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +from pathlib import Path + +import pytest +from torchvision.transforms.v2 import Resize + +from anomalib.data import MVTecAD2 +from anomalib.data.datasets.image.mvtecad2 import TestType +from tests.unit.data.datamodule.base.image import _TestAnomalibImageDatamodule + + +class TestMVTecAD2(_TestAnomalibImageDatamodule): + """MVTec AD 2 Datamodule Unit Tests.""" + + @pytest.fixture() + @staticmethod + def datamodule(dataset_path: Path) -> MVTecAD2: + """Create and return a MVTec AD 2 datamodule.""" + _datamodule = MVTecAD2( + root=dataset_path / "mvtecad2", + category="dummy", + train_batch_size=4, + eval_batch_size=4, + augmentations=Resize((256, 256)), + ) + _datamodule.setup() + + return _datamodule + + @pytest.fixture() + @staticmethod + def fxt_data_config_path() -> str: + """Return the path to the test data config.""" + return "examples/configs/data/mvtecad2.yaml" + + @staticmethod + def test_test_types(datamodule: MVTecAD2) -> None: + """Test that the datamodule can handle different test types.""" + # Test public test set + public_loader = datamodule.test_dataloader(test_type=TestType.PUBLIC) + assert public_loader is not None + batch = next(iter(public_loader)) + assert batch.image.shape == (4, 3, 256, 256) + + # Test private test set + private_loader = datamodule.test_dataloader(test_type=TestType.PRIVATE) + assert private_loader is not None + batch = next(iter(private_loader)) + assert batch.image.shape == (4, 3, 256, 256) + + # Test private mixed test set + mixed_loader = datamodule.test_dataloader(test_type=TestType.PRIVATE_MIXED) + assert mixed_loader is not None + batch = next(iter(mixed_loader)) + assert batch.image.shape == (4, 3, 256, 256) + + # Test invalid test type + with pytest.raises(ValueError, match="'invalid' is not a valid TestType"): + datamodule.test_dataloader(test_type="invalid") From 1f10185e820349423fecd2904ccbb7b2e0eeeebb Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 12 Feb 2025 12:47:13 +0000 Subject: [PATCH 08/19] Fix engine unit tests Signed-off-by: Samet Akcay --- src/anomalib/models/image/efficient_ad/README.md | 2 +- tests/unit/engine/test_engine.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/anomalib/models/image/efficient_ad/README.md b/src/anomalib/models/image/efficient_ad/README.md index 3344732ea6..30fd2c9662 100644 --- a/src/anomalib/models/image/efficient_ad/README.md +++ b/src/anomalib/models/image/efficient_ad/README.md @@ -18,7 +18,7 @@ Anomalies are detected as the difference in output feature maps between the teac ## Usage -`anomalib train --model EfficientAd --data anomalib.data.MVTec --data.category --data.train_batch_size 1` +`anomalib train --model EfficientAd --data anomalib.data.MVTecAD --data.category --data.train_batch_size 1` ## Benchmark diff --git a/tests/unit/engine/test_engine.py b/tests/unit/engine/test_engine.py index e38c01e634..cc027ef5a5 100644 --- a/tests/unit/engine/test_engine.py +++ b/tests/unit/engine/test_engine.py @@ -76,7 +76,7 @@ def fxt_full_config_path(tmp_path: Path) -> Path: pre_trained: true n_features: null data: - class_path: anomalib.data.MVTec + class_path: anomalib.data.MVTecAD init_args: root: datasets/MVTecAD category: bottle From 871352813ef981446f07c301f0eb6240c02a83e9 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 13 Feb 2025 14:56:46 +0000 Subject: [PATCH 09/19] Fix mvtec path in inference tests Signed-off-by: Samet Akcay --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0b59140010..bdb35f3dbb 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTecAD(root=dataset_path / "MVTecAD", category="dummy") + dataset = MVTecAD(root=dataset_path / "mvtecad", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path From f6506295bbeea9747d928b8c6e62a6005c70fce8 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Wed, 5 Mar 2025 18:06:26 +0000 Subject: [PATCH 10/19] Rename MVTec dataset path in tests Signed-off-by: Samet Akcay --- src/anomalib/data/datasets/image/mvtecad.py | 4 ++-- src/anomalib/data/datasets/image/visa.py | 4 ++-- tests/conftest.py | 2 +- tests/helpers/data.py | 4 +++- tests/unit/data/datamodule/image/test_folder.py | 2 +- tests/unit/data/datamodule/image/test_mvtec_ad.py | 2 +- tests/unit/data/utils/test_image.py | 4 ++-- tests/unit/data/utils/test_path.py | 4 ++-- tests/unit/data/utils/test_synthetic.py | 2 +- .../callbacks/visualizer_callback/dummy_lightning_model.py | 2 +- .../utils/callbacks/visualizer_callback/test_visualizer.py | 2 +- tests/unit/utils/test_visualizer.py | 4 ++-- 12 files changed, 19 insertions(+), 17 deletions(-) diff --git a/src/anomalib/data/datasets/image/mvtecad.py b/src/anomalib/data/datasets/image/mvtecad.py index 9c78779172..192f8260e6 100644 --- a/src/anomalib/data/datasets/image/mvtecad.py +++ b/src/anomalib/data/datasets/image/mvtecad.py @@ -114,14 +114,14 @@ def __init__( self.root_category = Path(root) / Path(category) self.category = category self.split = split - self.samples = make_mvtec_dataset( + self.samples = make_mvtec_ad_dataset( self.root_category, split=self.split, extensions=IMG_EXTENSIONS, ) -def make_mvtec_dataset( +def make_mvtec_ad_dataset( root: str | Path, split: str | Split | None = None, extensions: Sequence[str] | None = None, diff --git a/src/anomalib/data/datasets/image/visa.py b/src/anomalib/data/datasets/image/visa.py index ffcc351381..2e2f79d118 100644 --- a/src/anomalib/data/datasets/image/visa.py +++ b/src/anomalib/data/datasets/image/visa.py @@ -28,7 +28,7 @@ from torchvision.transforms.v2 import Transform from anomalib.data.datasets import AnomalibDataset -from anomalib.data.datasets.image.mvtecad import make_mvtec_dataset +from anomalib.data.datasets.image.mvtecad import make_mvtec_ad_dataset from anomalib.data.utils import Split EXTENSIONS = (".png", ".jpg", ".JPG") @@ -86,7 +86,7 @@ def __init__( self.root_category = Path(root) / category self.split = split - self.samples = make_mvtec_dataset( + self.samples = make_mvtec_ad_dataset( self.root_category, split=self.split, extensions=EXTENSIONS, diff --git a/tests/conftest.py b/tests/conftest.py index bdb35f3dbb..0b59140010 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTecAD(root=dataset_path / "mvtecad", category="dummy") + dataset = MVTecAD(root=dataset_path / "MVTecAD", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 444c621bd9..67d26f80c3 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -298,7 +298,7 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): def __init__( self, - data_format: DataFormat | str = "MVTecAD", + data_format: DataFormat | str = "mvtecad", root: Path | str | None = None, normal_category: str = "good", abnormal_category: str = "bad", @@ -369,6 +369,8 @@ def _generate_dummy_mvtecad_dataset( mask_extension: str = ".png", ) -> None: """Generates dummy MVTecAD dataset in a temporary directory using the same convention as MVTec AD.""" + # Replace the default mvtecad dataset path to MVTecAD + self.dataset_root = self.dataset_root.with_name("MVTecAD") # MVTec has multiple subcategories within the dataset. dataset_category = "dummy" diff --git a/tests/unit/data/datamodule/image/test_folder.py b/tests/unit/data/datamodule/image/test_folder.py index 7ecb2c0e64..940a329bac 100644 --- a/tests/unit/data/datamodule/image/test_folder.py +++ b/tests/unit/data/datamodule/image/test_folder.py @@ -28,7 +28,7 @@ def datamodule(dataset_path: Path) -> Folder: # Create and prepare the dataset _datamodule = Folder( name="dummy", - root=dataset_path / "mvtecad" / "dummy", + root=dataset_path / "MVTecAD" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/data/datamodule/image/test_mvtec_ad.py b/tests/unit/data/datamodule/image/test_mvtec_ad.py index 89f973c5bb..43dfac4f73 100644 --- a/tests/unit/data/datamodule/image/test_mvtec_ad.py +++ b/tests/unit/data/datamodule/image/test_mvtec_ad.py @@ -20,7 +20,7 @@ class TestMVTecAD(_TestAnomalibImageDatamodule): def datamodule(dataset_path: Path) -> MVTecAD: """Create and return a MVTec datamodule.""" _datamodule = MVTecAD( - root=dataset_path / "mvtecad", + root=dataset_path / "MVTecAD", category="dummy", train_batch_size=4, eval_batch_size=4, diff --git a/tests/unit/data/utils/test_image.py b/tests/unit/data/utils/test_image.py index ac3ab1def3..b0e55a10ed 100644 --- a/tests/unit/data/utils/test_image.py +++ b/tests/unit/data/utils/test_image.py @@ -16,14 +16,14 @@ class TestGetImageFilenames: @staticmethod def test_existing_image_file(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct path for an existing image file.""" - image_path = dataset_path / "mvtecad/dummy/train/good/000.png" + image_path = dataset_path / "MVTecAD/dummy/train/good/000.png" image_filenames = get_image_filenames(image_path) assert image_filenames == [image_path.resolve()] @staticmethod def test_existing_image_directory(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct image filenames from an existing directory.""" - directory_path = dataset_path / "mvtecad/dummy/train/good" + directory_path = dataset_path / "MVTecAD/dummy/train/good" image_filenames = get_image_filenames(directory_path) expected_filenames = [(directory_path / f"{i:03d}.png").resolve() for i in range(5)] assert set(image_filenames) == set(expected_filenames) diff --git a/tests/unit/data/utils/test_path.py b/tests/unit/data/utils/test_path.py index 55a6f8b062..f0fba19ea0 100644 --- a/tests/unit/data/utils/test_path.py +++ b/tests/unit/data/utils/test_path.py @@ -35,14 +35,14 @@ def test_contains_non_printable_characters() -> None: @staticmethod def test_existing_file_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing file within the base directory.""" - file_path = dataset_path / "mvtecad/dummy/train/good/000.png" + file_path = dataset_path / "MVTecAD/dummy/train/good/000.png" validated_path = validate_path(file_path, base_dir=dataset_path) assert validated_path == file_path.resolve() @staticmethod def test_existing_directory_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing directory within the base directory.""" - directory_path = dataset_path / "mvtecad/dummy/train/good" + directory_path = dataset_path / "MVTecAD/dummy/train/good" validated_path = validate_path(directory_path, base_dir=dataset_path) assert validated_path == directory_path.resolve() diff --git a/tests/unit/data/utils/test_synthetic.py b/tests/unit/data/utils/test_synthetic.py index cc8a539e79..a5b31b75b4 100644 --- a/tests/unit/data/utils/test_synthetic.py +++ b/tests/unit/data/utils/test_synthetic.py @@ -18,7 +18,7 @@ def folder_dataset(dataset_path: Path) -> FolderDataset: """Fixture that returns a FolderDataset instance.""" return FolderDataset( name="dummy", - root=dataset_path / "mvtecad" / "dummy", + root=dataset_path / "MVTecAD" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py index dfbee026e7..a7e5f4d211 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py +++ b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py @@ -50,7 +50,7 @@ def test_step(self, *_, **__) -> ImageBatch: """Only used to trigger on_test_epoch_end.""" self.log(name="loss", value=0.0, prog_bar=True) return ImageBatch( - image_path=[Path(self.dataset_path / "mvtecad" / "dummy" / "train" / "good" / "000.png")], + image_path=[Path(self.dataset_path / "MVTecAD" / "dummy" / "train" / "good" / "000.png")], image=torch.rand((1, 3, 100, 100)).to(self.device), gt_mask=torch.zeros((1, 100, 100)).to(self.device), anomaly_map=torch.ones((1, 100, 100)).to(self.device), diff --git a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py index 07f7c71b1a..849aa1cf68 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py +++ b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py @@ -24,7 +24,7 @@ def test_add_images(dataset_path: Path) -> None: limit_test_batches=1, accelerator="cpu", ) - engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtecad", category="dummy")) + engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "MVTecAD", category="dummy")) # test if images are logged assert len(list(Path(dir_loc).glob("**/*.png"))) >= 1, "Failed to save to local path" diff --git a/tests/unit/utils/test_visualizer.py b/tests/unit/utils/test_visualizer.py index 7476cf4943..00b646b4ed 100644 --- a/tests/unit/utils/test_visualizer.py +++ b/tests/unit/utils/test_visualizer.py @@ -50,9 +50,9 @@ def test_model_visualizer_mode( fast_dev_run=True, devices=1, ) - datamodule = MVTecAD(root=dataset_path / "mvtecad", category="dummy") + datamodule = MVTecAD(root=dataset_path / "MVTecAD", category="dummy") engine.test(model=model, datamodule=datamodule, ckpt_path=str(_ckpt_path)) - dataset = PredictDataset(path=dataset_path / "mvtecad" / "dummy" / "test") + dataset = PredictDataset(path=dataset_path / "MVTecAD" / "dummy" / "test") datamodule = DataLoader(dataset, collate_fn=ImageBatch.collate) engine.predict(model=model, dataloaders=datamodule, ckpt_path=str(_ckpt_path)) From 786c0c337e92fa6de0b53cb43b9cc06556682f68 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 6 Mar 2025 07:36:36 +0000 Subject: [PATCH 11/19] Fix mvtec path in conftest Signed-off-by: Samet Akcay --- tests/conftest.py | 4 ++-- tests/helpers/data.py | 20 +++++++++----------- tests/integration/cli/test_cli.py | 4 ++-- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/tests/conftest.py b/tests/conftest.py index 0b59140010..281dbb5d54 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,6 @@ """Fixtures for the entire test suite.""" -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import shutil @@ -95,7 +95,7 @@ def checkpoint(model_name: str) -> Path: max_epochs=1, devices=1, ) - dataset = MVTecAD(root=dataset_path / "MVTecAD", category="dummy") + dataset = MVTecAD(root=dataset_path / "mvtecad", category="dummy") engine.fit(model=model, datamodule=dataset) return _ckpt_path diff --git a/tests/helpers/data.py b/tests/helpers/data.py index 67d26f80c3..9e09516b15 100644 --- a/tests/helpers/data.py +++ b/tests/helpers/data.py @@ -1,6 +1,6 @@ """Test Helpers - Dataset.""" -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations @@ -277,23 +277,23 @@ class DummyImageDatasetGenerator(DummyDatasetGenerator): Examples: To create an MVTecAD dataset with 10 training images and 10 testing images per category, use the following code. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="MVTecAD", num_train=10, num_test=10) + >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtecad", num_train=10, num_test=10) >>> dataset_generator.generate_dataset() In order to provide a specific directory to save the dataset, use the ``root`` argument. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="MVTecAD", root="./datasets/dummy") + >>> dataset_generator = DummyImageDatasetGenerator(data_format="mvtecad", root="./datasets/dummy") >>> dataset_generator.generate_dataset() It is also possible to use the generator as a context manager. - >>> with DummyImageDatasetGenerator(data_format="MVTecAD", num_train=10, num_test=10) as dataset_path: + >>> with DummyImageDatasetGenerator(data_format="mvtecad", num_train=10, num_test=10) as dataset_path: >>> some_function() - To get the list of available datasets, use the ``DataFormat`` enum. - >>> from anomalib.data import DataFormat - >>> print(list(DataFormat)) + To get the list of available image datasets, use the ``ImageDataFormat`` enum. + >>> from anomalib.data import ImageDataFormat + >>> print(list(ImageDataFormat)) - Then you can use the ``DataFormat`` enum to generate the dataset. - >>> dataset_generator = DummyImageDatasetGenerator(data_format="beantech", num_train=10, num_test=10) + Then you can use the ``ImageDataFormat`` enum to generate the dataset. + >>> dataset_generator = DummyImageDatasetGenerator(data_format="btech", num_train=10, num_test=10) """ def __init__( @@ -369,8 +369,6 @@ def _generate_dummy_mvtecad_dataset( mask_extension: str = ".png", ) -> None: """Generates dummy MVTecAD dataset in a temporary directory using the same convention as MVTec AD.""" - # Replace the default mvtecad dataset path to MVTecAD - self.dataset_root = self.dataset_root.with_name("MVTecAD") # MVTec has multiple subcategories within the dataset. dataset_category = "dummy" diff --git a/tests/integration/cli/test_cli.py b/tests/integration/cli/test_cli.py index e882e49ef2..7310e811a6 100644 --- a/tests/integration/cli/test_cli.py +++ b/tests/integration/cli/test_cli.py @@ -3,7 +3,7 @@ This just checks if one of the model works end-to-end. The rest of the models are checked using the API. """ -# Copyright (C) 2023-2024 Intel Corporation +# Copyright (C) 2023-2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 from pathlib import Path @@ -191,7 +191,7 @@ def _get_common_cli_args(dataset_path: Path | None, project_path: Path) -> list[ # We need to set the predict dataloader as MVTec AD and UCSDped do not # have predict_dataloader attribute defined. if dataset_path: - data_root = f"{dataset_path}/MVTecAD" + data_root = f"{dataset_path}/mvtecad" dataclass = "MVTecAD" data_args = [ "--data", From 0ce1810f6d23752633b4793a652dd46880a8801f Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 6 Mar 2025 08:21:54 +0000 Subject: [PATCH 12/19] remove redundant relative import Signed-off-by: Samet Akcay --- src/anomalib/data/datamodules/image/__init__.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/anomalib/data/datamodules/image/__init__.py b/src/anomalib/data/datamodules/image/__init__.py index 1dda736220..f817d4c948 100644 --- a/src/anomalib/data/datamodules/image/__init__.py +++ b/src/anomalib/data/datamodules/image/__init__.py @@ -64,7 +64,6 @@ class ImageDataFormat(str, Enum): "Folder", "Kolektor", "MVTecAD", - "MVTecAD", # Include both for backward compatibility + "MVTec", # Include both for backward compatibility "Visa", - "MVTec", ] From a13ad014fe93d5014601aed59681fcb5eba82ef2 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 6 Mar 2025 10:04:35 +0000 Subject: [PATCH 13/19] Increase the timeout Signed-off-by: Samet Akcay --- .github/workflows/_reusable-test-suite.yaml | 2 +- .github/workflows/pr.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_reusable-test-suite.yaml b/.github/workflows/_reusable-test-suite.yaml index 3daac10189..461f0adddd 100644 --- a/.github/workflows/_reusable-test-suite.yaml +++ b/.github/workflows/_reusable-test-suite.yaml @@ -80,7 +80,7 @@ on: timeout: description: "Test timeout in minutes" type: number - default: 10 + default: 30 enable-cache: description: "Enable pip caching" type: string diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 2006120fc2..89ccb25204 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -61,7 +61,7 @@ jobs: with: test-type: "unit" runner: "ubuntu-latest" - timeout: 15 + timeout: 30 enable-cache: "true" secrets: codecov-token: ${{ secrets.CODECOV_TOKEN }} From f526a473ce974cfe0b728c6d068006b269fc2cc4 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 6 Mar 2025 10:11:05 +0000 Subject: [PATCH 14/19] Fix unit tests Signed-off-by: Samet Akcay --- .github/workflows/_reusable-test-suite.yaml | 2 +- .github/workflows/pr.yaml | 2 +- tests/unit/data/datamodule/image/test_folder.py | 2 +- tests/unit/data/datamodule/image/test_mvtec_ad.py | 2 +- tests/unit/data/utils/test_image.py | 4 ++-- tests/unit/data/utils/test_path.py | 4 ++-- tests/unit/data/utils/test_synthetic.py | 2 +- .../callbacks/visualizer_callback/dummy_lightning_model.py | 2 +- .../utils/callbacks/visualizer_callback/test_visualizer.py | 2 +- tests/unit/utils/test_visualizer.py | 4 ++-- 10 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/_reusable-test-suite.yaml b/.github/workflows/_reusable-test-suite.yaml index 461f0adddd..3daac10189 100644 --- a/.github/workflows/_reusable-test-suite.yaml +++ b/.github/workflows/_reusable-test-suite.yaml @@ -80,7 +80,7 @@ on: timeout: description: "Test timeout in minutes" type: number - default: 30 + default: 10 enable-cache: description: "Enable pip caching" type: string diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 89ccb25204..2006120fc2 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -61,7 +61,7 @@ jobs: with: test-type: "unit" runner: "ubuntu-latest" - timeout: 30 + timeout: 15 enable-cache: "true" secrets: codecov-token: ${{ secrets.CODECOV_TOKEN }} diff --git a/tests/unit/data/datamodule/image/test_folder.py b/tests/unit/data/datamodule/image/test_folder.py index 940a329bac..7ecb2c0e64 100644 --- a/tests/unit/data/datamodule/image/test_folder.py +++ b/tests/unit/data/datamodule/image/test_folder.py @@ -28,7 +28,7 @@ def datamodule(dataset_path: Path) -> Folder: # Create and prepare the dataset _datamodule = Folder( name="dummy", - root=dataset_path / "MVTecAD" / "dummy", + root=dataset_path / "mvtecad" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/data/datamodule/image/test_mvtec_ad.py b/tests/unit/data/datamodule/image/test_mvtec_ad.py index 43dfac4f73..89f973c5bb 100644 --- a/tests/unit/data/datamodule/image/test_mvtec_ad.py +++ b/tests/unit/data/datamodule/image/test_mvtec_ad.py @@ -20,7 +20,7 @@ class TestMVTecAD(_TestAnomalibImageDatamodule): def datamodule(dataset_path: Path) -> MVTecAD: """Create and return a MVTec datamodule.""" _datamodule = MVTecAD( - root=dataset_path / "MVTecAD", + root=dataset_path / "mvtecad", category="dummy", train_batch_size=4, eval_batch_size=4, diff --git a/tests/unit/data/utils/test_image.py b/tests/unit/data/utils/test_image.py index b0e55a10ed..ac3ab1def3 100644 --- a/tests/unit/data/utils/test_image.py +++ b/tests/unit/data/utils/test_image.py @@ -16,14 +16,14 @@ class TestGetImageFilenames: @staticmethod def test_existing_image_file(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct path for an existing image file.""" - image_path = dataset_path / "MVTecAD/dummy/train/good/000.png" + image_path = dataset_path / "mvtecad/dummy/train/good/000.png" image_filenames = get_image_filenames(image_path) assert image_filenames == [image_path.resolve()] @staticmethod def test_existing_image_directory(dataset_path: Path) -> None: """Test ``get_image_filenames`` returns the correct image filenames from an existing directory.""" - directory_path = dataset_path / "MVTecAD/dummy/train/good" + directory_path = dataset_path / "mvtecad/dummy/train/good" image_filenames = get_image_filenames(directory_path) expected_filenames = [(directory_path / f"{i:03d}.png").resolve() for i in range(5)] assert set(image_filenames) == set(expected_filenames) diff --git a/tests/unit/data/utils/test_path.py b/tests/unit/data/utils/test_path.py index f0fba19ea0..55a6f8b062 100644 --- a/tests/unit/data/utils/test_path.py +++ b/tests/unit/data/utils/test_path.py @@ -35,14 +35,14 @@ def test_contains_non_printable_characters() -> None: @staticmethod def test_existing_file_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing file within the base directory.""" - file_path = dataset_path / "MVTecAD/dummy/train/good/000.png" + file_path = dataset_path / "mvtecad/dummy/train/good/000.png" validated_path = validate_path(file_path, base_dir=dataset_path) assert validated_path == file_path.resolve() @staticmethod def test_existing_directory_within_base_dir(dataset_path: Path) -> None: """Test ``validate_path`` returns the validated path for an existing directory within the base directory.""" - directory_path = dataset_path / "MVTecAD/dummy/train/good" + directory_path = dataset_path / "mvtecad/dummy/train/good" validated_path = validate_path(directory_path, base_dir=dataset_path) assert validated_path == directory_path.resolve() diff --git a/tests/unit/data/utils/test_synthetic.py b/tests/unit/data/utils/test_synthetic.py index a5b31b75b4..cc8a539e79 100644 --- a/tests/unit/data/utils/test_synthetic.py +++ b/tests/unit/data/utils/test_synthetic.py @@ -18,7 +18,7 @@ def folder_dataset(dataset_path: Path) -> FolderDataset: """Fixture that returns a FolderDataset instance.""" return FolderDataset( name="dummy", - root=dataset_path / "MVTecAD" / "dummy", + root=dataset_path / "mvtecad" / "dummy", normal_dir="train/good", abnormal_dir="test/bad", normal_test_dir="test/good", diff --git a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py index a7e5f4d211..dfbee026e7 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py +++ b/tests/unit/utils/callbacks/visualizer_callback/dummy_lightning_model.py @@ -50,7 +50,7 @@ def test_step(self, *_, **__) -> ImageBatch: """Only used to trigger on_test_epoch_end.""" self.log(name="loss", value=0.0, prog_bar=True) return ImageBatch( - image_path=[Path(self.dataset_path / "MVTecAD" / "dummy" / "train" / "good" / "000.png")], + image_path=[Path(self.dataset_path / "mvtecad" / "dummy" / "train" / "good" / "000.png")], image=torch.rand((1, 3, 100, 100)).to(self.device), gt_mask=torch.zeros((1, 100, 100)).to(self.device), anomaly_map=torch.ones((1, 100, 100)).to(self.device), diff --git a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py index 849aa1cf68..07f7c71b1a 100644 --- a/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py +++ b/tests/unit/utils/callbacks/visualizer_callback/test_visualizer.py @@ -24,7 +24,7 @@ def test_add_images(dataset_path: Path) -> None: limit_test_batches=1, accelerator="cpu", ) - engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "MVTecAD", category="dummy")) + engine.test(model=model, datamodule=MVTecAD(root=dataset_path / "mvtecad", category="dummy")) # test if images are logged assert len(list(Path(dir_loc).glob("**/*.png"))) >= 1, "Failed to save to local path" diff --git a/tests/unit/utils/test_visualizer.py b/tests/unit/utils/test_visualizer.py index 00b646b4ed..7476cf4943 100644 --- a/tests/unit/utils/test_visualizer.py +++ b/tests/unit/utils/test_visualizer.py @@ -50,9 +50,9 @@ def test_model_visualizer_mode( fast_dev_run=True, devices=1, ) - datamodule = MVTecAD(root=dataset_path / "MVTecAD", category="dummy") + datamodule = MVTecAD(root=dataset_path / "mvtecad", category="dummy") engine.test(model=model, datamodule=datamodule, ckpt_path=str(_ckpt_path)) - dataset = PredictDataset(path=dataset_path / "MVTecAD" / "dummy" / "test") + dataset = PredictDataset(path=dataset_path / "mvtecad" / "dummy" / "test") datamodule = DataLoader(dataset, collate_fn=ImageBatch.collate) engine.predict(model=model, dataloaders=datamodule, ckpt_path=str(_ckpt_path)) From 609250422038126eb32d6f11664e89a75fdd7769 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 6 Mar 2025 14:41:44 +0000 Subject: [PATCH 15/19] Fix mvtec2 config file example Signed-off-by: Samet Akcay --- examples/configs/data/mvtecad2.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/configs/data/mvtecad2.yaml b/examples/configs/data/mvtecad2.yaml index 41ce5b9379..dcd8dd73a1 100644 --- a/examples/configs/data/mvtecad2.yaml +++ b/examples/configs/data/mvtecad2.yaml @@ -5,7 +5,5 @@ init_args: train_batch_size: 32 eval_batch_size: 32 num_workers: 8 - test_split_mode: from_dir - val_split_mode: from_dir test_type: "public" seed: null From 11a43cf4d23cd15ce7a30e48cf67968231e41dc1 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Thu, 6 Mar 2025 16:56:10 +0000 Subject: [PATCH 16/19] Update the test split docstring Signed-off-by: Samet Akcay --- examples/api/02_data/mvtecad2.py | 2 +- .../data/datamodules/image/mvtecad2.py | 28 +++++++++----- src/anomalib/data/datasets/image/mvtecad2.py | 38 ++++++++++--------- 3 files changed, 40 insertions(+), 28 deletions(-) diff --git a/examples/api/02_data/mvtecad2.py b/examples/api/02_data/mvtecad2.py index b450838f55..f241983696 100644 --- a/examples/api/02_data/mvtecad2.py +++ b/examples/api/02_data/mvtecad2.py @@ -7,7 +7,7 @@ 4. Work with custom transforms and visualization """ -# Copyright (C) 2024 Intel Corporation +# Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 import matplotlib.pyplot as plt diff --git a/src/anomalib/data/datamodules/image/mvtecad2.py b/src/anomalib/data/datamodules/image/mvtecad2.py index 729fccbeb9..ba7934207a 100644 --- a/src/anomalib/data/datamodules/image/mvtecad2.py +++ b/src/anomalib/data/datamodules/image/mvtecad2.py @@ -6,12 +6,15 @@ The dataset provides three different test sets: - Public test set (test_public/): Contains both normal and anomalous samples with ground truth masks - - Private test set (test_private/): Contains unseen test samples without ground truth - - Private mixed test set (test_private_mixed/): Contains unseen test samples - with mixed anomalies without ground truth + for facilitating local testing and initial performance estimation + - Private test set (test_private/): Official unseen test set without ground truth + for entering the leaderboard + - Private mixed test set (test_private_mixed/): Contains unseen test samples captured + under seen and unseen lighting conditions (mixed randomly) without ground truth -The public test set is used for standard evaluation, while the private test sets -are used for real-world evaluation scenarios where ground truth is not available. +The public test set is meant for local evaluation, while the private test sets +are the official test sets for entering the leaderboard on the evaluation server +(https://benchmark.mvtec.com/). License: MVTec AD 2 dataset is released under the Creative Commons @@ -61,9 +64,13 @@ class MVTecAD2(AnomalibDataModule): Defaults to ``None``. augmentations (Transform | None): General augmentations to apply if stage-specific augmentations are not provided. - test_type (str | TestType): Type of test set to use - ``"public"``, ``"private"``, - or ``"private_mixed"``. This determines which test set is returned by - test_dataloader(). Defaults to ``TestType.PUBLIC``. + test_type (str | TestType): Type of test set to use: + - ``"public"``: Test set with ground truth for local evaluation and initial + performance estimation + - ``"private"``: Official test set without ground truth for leaderboard submission + - ``"private_mixed"``: Official test set with mixed lighting conditions (seen and + unseen, randomly mixed) for leaderboard submission + Defaults to ``TestType.PUBLIC``. seed (int | None, optional): Random seed for reproducibility. Defaults to ``None``. @@ -182,7 +189,10 @@ def test_dataloader(self, test_type: str | TestType | None = None) -> EVAL_DATAL """Get test dataloader for the specified test type. Args: - test_type (str | TestType | None, optional): Type of test set to use. + test_type (str | TestType | None, optional): Type of test set to use: + - ``"public"``: Test set with ground truth for local evaluation + - ``"private"``: Official test set without ground truth for leaderboard + - ``"private_mixed"``: Official test set with mixed lighting conditions If None, uses the test_type specified in __init__. Defaults to None. diff --git a/src/anomalib/data/datasets/image/mvtecad2.py b/src/anomalib/data/datasets/image/mvtecad2.py index 03f994caa3..1070225703 100644 --- a/src/anomalib/data/datasets/image/mvtecad2.py +++ b/src/anomalib/data/datasets/image/mvtecad2.py @@ -44,14 +44,16 @@ class TestType(str, Enum): """Type of test set to use. The MVTec AD 2 dataset provides three different test sets: - - PUBLIC: Standard test set with ground truth masks for evaluation - - PRIVATE: Unseen test set without ground truth for real-world testing - - PRIVATE_MIXED: Unseen test set with mixed anomalies without ground truth + - PUBLIC: Test set with ground truth masks for facilitating local testing and initial performance estimation + - PRIVATE: Official unseen test set without ground truth for entering the leaderboard + - PRIVATE_MIXED: Official unseen test set captured under seen and unseen lighting conditions (mixed randomly) + + Official evaluation server: https://benchmark.mvtec.com/ """ - PUBLIC = "public" # Test set with ground truth - PRIVATE = "private" # Private test set without ground truth - PRIVATE_MIXED = "private_mixed" # Private test set with mixed anomalies + PUBLIC = "public" # Test set with ground truth for local evaluation + PRIVATE = "private" # Official private test set without ground truth + PRIVATE_MIXED = "private_mixed" # Official private test set with mixed lighting conditions IMG_EXTENSIONS = (".png", ".PNG") @@ -79,11 +81,10 @@ class MVTecAD2Dataset(AnomalibDataset): Defaults to ``None``. split (str | Split | None): Dataset split - usually ``Split.TRAIN``, ``Split.VAL``, or ``Split.TEST``. Defaults to ``None``. - test_type (str | TestType): Type of test set to use - ``"public"``, ``"private"``, - or ``"private_mixed"``. Only used when split is ``Split.TEST``. - - ``"public"``: Standard test set with ground truth masks - - ``"private"``: Unseen test set without ground truth - - ``"private_mixed"``: Unseen test set with mixed anomalies + test_type (str | TestType): Type of test set to use - only used when split is ``Split.TEST``: + - ``"public"``: Test set with ground truth for local evaluation and initial performance estimation + - ``"private"``: Official test set without ground truth for leaderboard submission + - ``"private_mixed"``: Official test set with mixed lighting conditions (seen and unseen lighting) Defaults to ``TestType.PUBLIC``. Example: @@ -181,13 +182,14 @@ def make_mvtec2_dataset( └── good/ Args: - root (Path | str): Path to dataset root directory - split (str | Split | None, optional): Dataset split (train, validation or test) - Defaults to ``None``. - test_type (TestType): Type of test set to use - public, private, or private_mixed. - Only used when split is test. Defaults to ``TestType.PUBLIC``. - extensions (Sequence[str] | None, optional): Valid file extensions - Defaults to ``None``. + root (str | Path): Path to the dataset root directory + split (str | Split | None, optional): Dataset split (train, val, test). Defaults to None. + test_type (TestType, optional): Type of test set to use for testing: + - PUBLIC: Test set with ground truth (for local evaluation) + - PRIVATE: Official test set without ground truth (for leaderboard) + - PRIVATE_MIXED: Official test set with mixed lighting conditions (for leaderboard) + Defaults to TestType.PUBLIC. + extensions (Sequence[str] | None, optional): Image extensions to include. Defaults to None. Returns: DataFrame: Dataset samples with columns: From afc4d01ae98f1f929e6875e95af6bbf1bd0cb3a4 Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Tue, 1 Apr 2025 12:24:51 +0100 Subject: [PATCH 17/19] Do not create empty masks for unknown labels Signed-off-by: Samet Akcay --- src/anomalib/data/datasets/base/image.py | 56 +++++++++++++-------- src/anomalib/data/datasets/image/mvtecad.py | 2 +- 2 files changed, 36 insertions(+), 22 deletions(-) diff --git a/src/anomalib/data/datasets/base/image.py b/src/anomalib/data/datasets/base/image.py index 4cd659e1c0..b425f32c1a 100644 --- a/src/anomalib/data/datasets/base/image.py +++ b/src/anomalib/data/datasets/base/image.py @@ -264,30 +264,44 @@ def __getitem__(self, index: int) -> DatasetItem: mask_path = self.samples.iloc[index].mask_path label_index = self.samples.iloc[index].label_index + # Read the image image = read_image(image_path, as_tensor=True) - item = {"image_path": image_path, "gt_label": label_index} - - if self.task == TaskType.CLASSIFICATION: - item["image"] = self.augmentations(image) if self.augmentations else image - elif self.task == TaskType.SEGMENTATION: - # Create empty mask for: - # - Normal samples (label_index = 0) - # - Unknown samples (label_index = -1) - # Only use mask for anomalous samples (label_index = 1) - mask = ( - Mask(torch.zeros(image.shape[-2:])).to(torch.uint8) - if label_index in {LabelName.NORMAL, LabelName.UNKNOWN} - else read_mask(mask_path, as_tensor=True) - ) - item["image"], item["gt_mask"] = self.augmentations(image, mask) if self.augmentations else (image, mask) - else: - msg = f"Unknown task type: {self.task}" - raise ValueError(msg) + # Initialize mask as None + gt_mask = None + + # Process based on task type + if self.task == TaskType.SEGMENTATION: + if label_index == LabelName.NORMAL: + # Create zero mask for normal samples + gt_mask = Mask(torch.zeros(image.shape[-2:])).to(torch.uint8) + elif label_index == LabelName.ABNORMAL: + # Read mask for anomalous samples + gt_mask = read_mask(mask_path, as_tensor=True) + # For UNKNOWN, gt_mask remains None + + # Apply augmentations if available + if self.augmentations: + if self.task == TaskType.CLASSIFICATION: + image = self.augmentations(image) + elif self.task == TaskType.SEGMENTATION: + # For augmentations that require both image and mask: + # - Use a temporary zero mask for UNKNOWN samples + # - But preserve the final gt_mask as None for UNKNOWN + temp_mask = gt_mask if gt_mask is not None else Mask(torch.zeros(image.shape[-2:])).to(torch.uint8) + image, augmented_mask = self.augmentations(image, temp_mask) + # Only update gt_mask if it wasn't None before augmentations + if gt_mask is not None: + gt_mask = augmented_mask + + # Create gt_label tensor (None for UNKNOWN) + gt_label = None if label_index == LabelName.UNKNOWN else torch.tensor(label_index) + + # Return the dataset item return ImageItem( - image=item["image"], - gt_mask=item.get("gt_mask"), - gt_label=torch.tensor(label_index), # Convert to tensor to match type hints + image=image, + gt_mask=gt_mask, + gt_label=gt_label, image_path=image_path, mask_path=mask_path, ) diff --git a/src/anomalib/data/datasets/image/mvtecad.py b/src/anomalib/data/datasets/image/mvtecad.py index 192f8260e6..7f303716c9 100644 --- a/src/anomalib/data/datasets/image/mvtecad.py +++ b/src/anomalib/data/datasets/image/mvtecad.py @@ -190,7 +190,7 @@ def make_mvtec_ad_dataset( ) # assign mask paths to anomalous test images - samples["mask_path"] = "" + samples["mask_path"] = None samples.loc[ (samples.split == "test") & (samples.label_index == LabelName.ABNORMAL), "mask_path", From b87baa70ddd061da3e519bb120d82e550ccf6a2c Mon Sep 17 00:00:00 2001 From: Dick Ameln Date: Thu, 3 Apr 2025 15:20:03 +0200 Subject: [PATCH 18/19] add mvtec 2 download info --- .../data/datamodules/image/mvtecad2.py | 41 +++++++++++++++++-- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/src/anomalib/data/datamodules/image/mvtecad2.py b/src/anomalib/data/datamodules/image/mvtecad2.py index ba7934207a..9f8ba5ac1f 100644 --- a/src/anomalib/data/datamodules/image/mvtecad2.py +++ b/src/anomalib/data/datamodules/image/mvtecad2.py @@ -30,6 +30,7 @@ # Copyright (C) 2025 Intel Corporation # SPDX-License-Identifier: Apache-2.0 +import logging from pathlib import Path from lightning.pytorch.utilities.types import EVAL_DATALOADERS @@ -39,7 +40,17 @@ from anomalib.data.datamodules.base.image import AnomalibDataModule from anomalib.data.datasets.image import MVTecAD2Dataset from anomalib.data.datasets.image.mvtecad2 import TestType -from anomalib.data.utils import Split +from anomalib.data.utils import DownloadInfo, Split, download_and_extract + +logger = logging.getLogger(__name__) + +# Download information for MVTec AD 2 dataset +DOWNLOAD_INFO = DownloadInfo( + name="mvtecad2", + url="https://www.mydrive.ch/shares/121573/7f68fe2c4f7c2ceaa08f463aaeb2f414/download/" + "466712769-1743422799/mvtec_ad_2.tar.gz", + hashsum="c0ded99ef32bfc8e352d52beb44515e5b292b8598cb963aadfa91ca0763505e4", +) class MVTecAD2(AnomalibDataModule): @@ -129,11 +140,33 @@ def __init__( self.test_type = TestType(test_type) if isinstance(test_type, str) else test_type def prepare_data(self) -> None: - """Prepare the dataset. + """Download the dataset if not available. + + This method checks if the specified dataset is available in the file + system. If not, it downloads and extracts the dataset into the + appropriate directory. + + Example: + Assume the dataset is not available on the file system:: + + >>> datamodule = MVTecAD2( + ... root="./datasets/MVTecAD2", + ... category="can" + ... ) + >>> datamodule.prepare_data() - MVTec AD 2 dataset needs to be downloaded manually from MVTec AD 2 website. + Directory structure after download:: + + datasets/ + └── MVTecAD2/ + β”œβ”€β”€ can/ + β”œβ”€β”€ fabric/ + └── ... """ - # NOTE: For now, users need to manually download the dataset. + if (self.root / self.category).is_dir(): + logger.info("Found the dataset.") + else: + download_and_extract(self.root, DOWNLOAD_INFO) def _setup(self, _stage: str | None = None) -> None: """Set up the datasets and perform train/validation/test split. From 6fd630be30113503cc6c80b2cd59ff8997aa88ea Mon Sep 17 00:00:00 2001 From: Samet Akcay Date: Fri, 4 Apr 2025 16:09:42 +0100 Subject: [PATCH 19/19] Update init Signed-off-by: Samet Akcay --- src/anomalib/data/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/anomalib/data/__init__.py b/src/anomalib/data/__init__.py index 83e40b4b8f..d43103c162 100644 --- a/src/anomalib/data/__init__.py +++ b/src/anomalib/data/__init__.py @@ -174,6 +174,7 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule "Kolektor", "MVTec", # Include MVTec for backward compatibility "MVTecAD", + "MVTecAD2", "MVTecLOCO", "RealIAD", "VAD", @@ -190,7 +191,6 @@ def get_datamodule(config: DictConfig | ListConfig | dict) -> AnomalibDataModule "FolderDataset", "KolektorDataset", "MVTecADDataset", - "MVTecAD2", "MVTecLOCODataset", "VADDataset", "VisaDataset",