Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #20

Merged
merged 36 commits into from
Jun 19, 2024
Merged

Dev #20

Changes from 1 commit
Commits
Show all changes
36 commits
Select commit Hold shift + click to select a range
f60ffa9
solve minor issues
edadaltocg Jun 1, 2023
99a0332
implement Beyond AUROC \& Co.
edadaltocg Jun 7, 2023
9e8c52d
small changes
edadaltocg Jul 3, 2023
628de5c
improve aggregations
edadaltocg Jul 3, 2023
9626764
fix typo
edadaltocg Jul 3, 2023
3acf527
ninco ssb clean benchmark
edadaltocg Aug 21, 2023
14c7541
update imagenet benchmarks
edadaltocg Aug 21, 2023
8e83cdb
small changes
edadaltocg Aug 23, 2023
3888942
p-value combining method
edadaltocg Sep 13, 2023
40bda81
small changes
edadaltocg Sep 13, 2023
89928e7
update doi
edadaltocg Sep 25, 2023
14aa3b2
update some datasets to hf hub
edadaltocg Oct 31, 2023
8e8a773
crop_pct bug
edadaltocg Nov 2, 2023
f147ea8
Ensembling + SC
edadaltocg Dec 14, 2023
40ec8d6
fix pipelines
edadaltocg Dec 14, 2023
4887663
docs
edadaltocg Jan 2, 2024
260596e
fix docstrings
edadaltocg Jan 2, 2024
802ec63
remove requirements.txt
edadaltocg Jan 2, 2024
251ed22
remove ba
edadaltocg Jan 2, 2024
db10b31
remove ba
edadaltocg Jan 2, 2024
fe9ea60
bump version
edadaltocg Jan 2, 2024
0bb1123
remove reqs
edadaltocg Jan 2, 2024
c1c04a3
bump actions
edadaltocg Jan 2, 2024
79ee723
minor fix
edadaltocg Jan 2, 2024
00c0dc1
minor fix
edadaltocg Jan 2, 2024
17beb4a
update pypi workflow
edadaltocg Jan 2, 2024
30b9861
fix typo
edadaltocg Jan 2, 2024
e7a67c3
download imagenet models
edadaltocg Jan 15, 2024
64c0e7b
Improvements to pipelines
edadaltocg Jun 19, 2024
e815c58
Code + docs cleaning
edadaltocg Jun 19, 2024
20ffb04
Combine and conquer score aggregation
edadaltocg Jun 19, 2024
eb9a811
helpful scripts
edadaltocg Jun 19, 2024
dac1ec3
Merge branch 'master' of https://github.com/edadaltocg/detectors into…
edadaltocg Jun 19, 2024
362f697
remove requirements.txt
edadaltocg Jun 19, 2024
46bba8a
Format
edadaltocg Jun 19, 2024
f6229f1
Simplify tests
edadaltocg Jun 19, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update imagenet benchmarks
  • Loading branch information
edadaltocg committed Aug 21, 2023
commit 14c754151082dc0dd5bf05f6b123120e790fa9e0
27 changes: 26 additions & 1 deletion src/detectors/data/__init__.py
Original file line number Diff line number Diff line change
@@ -2,13 +2,16 @@
Datasets module.
"""
import logging
import os
from enum import Enum
from functools import partial
from typing import Callable, List, Optional, Type

from torch.utils.data import Dataset
from torchvision.datasets import STL10, SVHN, ImageNet, OxfordIIITPet, StanfordCars

from .ninco_ssb_clean import NINCO, NINCOFull, SSBEasy, SSBHard, TexturesClean

from ..config import DATA_DIR, IMAGENET_ROOT
from .cifar_wrapper import CIFAR10Wrapped, CIFAR100Wrapped
from .cifarc import CIFAR10_C, CIFAR100_C
@@ -57,6 +60,11 @@
"mos_inaturalist": MOSiNaturalist,
"mos_places365": MOSPlaces365,
"mos_sun": MOSSUN,
"ninco_full": NINCOFull,
"ninco": NINCO,
"ssb_hard": SSBHard,
"ssb_easy": SSBEasy,
"textures_clean": TexturesClean,
"cifar10_lt": None,
"cifar100_lt": None,
"imagenet1k_lt": None,
@@ -130,7 +138,7 @@ def create_dataset(
`imagenet_a`, `imagenet_r`, `imagenet_o`, `openimage_o`, `oxford_pets`,
`oxford_flowers`, `cub200`, `imagenet1k_c`, `blobs`, `rademacher`,
`wilds_iwildcam`, `wilds_fmow`, `wilds_camelyon17`, `wilds_rxrx1`,
`wilds_poverty`, `wilds_globalwheat`.
`wilds_poverty`, `wilds_globalwheat`, `ninco`.
root (string): Root directory of dataset.
split (string, optional): Depends on the selected dataset.
transform (callable, optional): A function/transform that takes in an PIL image
@@ -155,6 +163,23 @@ def create_dataset(
raise ValueError("Dataset name is not specified")


def delete_dataset(dataset_name: str, root: str = DATA_DIR):
dataset_cls = datasets_registry[dataset_name]
try:
os.remove(os.path.join(root, dataset_cls.filename))
except FileNotFoundError:
print(f"File {dataset_cls.filename} not found")
except Exception as e:
print(e)

try:
os.remove(os.path.join(root, dataset_cls.base_folder))
except FileNotFoundError:
print(f"Folder {dataset_cls.base_folder} not found")
except Exception as e:
print(e)


def get_dataset_cls(dataset_name: str) -> Type[Dataset]:
"""Return dataset class by name.

2 changes: 0 additions & 2 deletions src/detectors/data/imagenet.py
Original file line number Diff line number Diff line change
@@ -24,7 +24,6 @@ class ImageNetA(ImageFolder):
tgz_md5 = "c3e55429088dc681f30d81f4726b6595"

def __init__(self, root: str, split=None, transform: Optional[Callable] = None, download: bool = False, **kwargs):

self.root = root

if download:
@@ -175,7 +174,6 @@ def _get_corruption_group(corruption: str):


def _imagenet_c_to_npz(root: str, split: str, intensity: int, dest_folder: str = "ImageNetCnpz") -> None:

dataset = ImageNetC(root, split, intensity, download=True)
assert len(dataset) == 50_000, "ImageNetC should have 50,000 images. Please check the dataset."
image_example = dataset[0][0]
4 changes: 2 additions & 2 deletions src/detectors/data/openimage_o.py
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ def __init__(
super().__init__(self.dataset_folder, transform=transform, **kwargs)

def _check_integrity(self) -> bool:
# assert number of iumages in folder is equal to 17632
# assert number of images in folder is equal to 17632
if not self._check_exists():
return False

@@ -52,7 +52,7 @@ def _check_integrity(self) -> bool:
# check if current path is a file
if os.path.isfile(os.path.join(self.dataset_folder, self.base_folder, path)):
count += 1
return count >= 16_000
return count >= 10_000

def _check_exists(self) -> bool:
return os.path.exists(self.dataset_folder)
65 changes: 65 additions & 0 deletions src/detectors/data/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import logging
import os
from typing import Callable, Optional

import numpy as np
from PIL import Image
from torch.utils.data.dataset import Dataset
from tqdm import tqdm

_logger = logging.getLogger(__name__)


def image_dataset_to_npz(DatasetCls, root: str, split: str, **kwargs) -> None:
dataset = DatasetCls(root, split, download=True, **kwargs)
dest_folder = DatasetCls.base_folder + "_npz"
image_example = dataset[0][0]
width, height = image_example.size
_logger.info("Image size: %d x %d", width, height)
x = np.ndarray(shape=(len(dataset), height, width, 3), dtype=np.uint8)
y = np.ndarray(shape=(len(dataset)), dtype=np.int32)
for i in tqdm(range(len(dataset))):
image, label = dataset[i]
x[i] = image
y[i] = label

os.makedirs(os.path.join(root, dest_folder), exist_ok=True)
np.savez(os.path.join(root, dest_folder, f"{split}.npz"), x=x, y=y)


class DatasetNpz(Dataset):
def __init__(
self,
root: str,
base_folder_name: str,
split: str,
transform: Optional[Callable] = None,
download: bool = False,
**kwargs,
) -> None:
super().__init__()
self.root = os.path.expanduser(root)
self.base_folder_name = base_folder_name
self.split = split
self.path = os.path.join(self.root, self.base_folder_name, f"{split}.npz")
self.transform = transform

data = np.load(self.path, mmap_mode="r")
self.images = data["x"]
self.labels = data["y"]

def __getitem__(self, index):
x = self.images[index]
x = Image.fromarray(x)

if self.transform:
x = self.transform(x)

y = self.labels[index]
return x, y

def __len__(self):
return len(self.images)

def _check_exists(self) -> bool:
return os.path.exists(self.path)
1 change: 0 additions & 1 deletion src/detectors/data/wilds_ds.py
Original file line number Diff line number Diff line change
@@ -59,7 +59,6 @@
def make_wilds_dataset(
dataset_name, root, split="train", transform: Optional[Callable] = None, download=False, **kwargs
):

dataset = wilds.get_dataset(dataset_name, root_dir=root, download=download)
assert dataset is not None
dataset = dataset.get_subset(split, transform=transform)