Skip to content

Commit 7436c0a

Browse files
Update code library
1 parent 938bf64 commit 7436c0a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

46 files changed

+4005
-388
lines changed

deepcore/__init__.py

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
# __init__.py

deepcore/datasets/__init__.py

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
from .cifar10 import *
2+
from .cifar100 import *
3+
from .fashionmnist import *
4+
from .imagenet import *
5+
from .mnist import *
6+
from .qmnist import *
7+
from .svhn import *
8+
from .tinyimagenet import *

deepcore/datasets/cifar10.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from torchvision import datasets, transforms
2+
from torch import tensor, long
3+
4+
5+
def CIFAR10(data_path):
6+
channel = 3
7+
im_size = (32, 32)
8+
num_classes = 10
9+
mean = [0.4914, 0.4822, 0.4465]
10+
std = [0.2470, 0.2435, 0.2616]
11+
12+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
13+
dst_train = datasets.CIFAR10(data_path, train=True, download=True, transform=transform)
14+
dst_test = datasets.CIFAR10(data_path, train=False, download=True, transform=transform)
15+
class_names = dst_train.classes
16+
dst_train.targets = tensor(dst_train.targets, dtype=long)
17+
dst_test.targets = tensor(dst_test.targets, dtype=long)
18+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/datasets/cifar100.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from torchvision import datasets, transforms
2+
from torch import tensor, long
3+
4+
5+
def CIFAR100(data_path):
6+
channel = 3
7+
im_size = (32, 32)
8+
num_classes = 100
9+
mean = [0.5071, 0.4865, 0.4409]
10+
std = [0.2673, 0.2564, 0.2762]
11+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
12+
dst_train = datasets.CIFAR100(data_path, train=True, download=True, transform=transform)
13+
dst_test = datasets.CIFAR100(data_path, train=False, download=True, transform=transform)
14+
class_names = dst_train.classes
15+
dst_train.targets = tensor(dst_train.targets, dtype=long)
16+
dst_test.targets = tensor(dst_test.targets, dtype=long)
17+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/datasets/fashionmnist.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from torchvision import datasets, transforms
2+
3+
4+
def FashionMNIST(data_path):
5+
channel = 1
6+
im_size = (28, 28)
7+
num_classes = 10
8+
mean = [0.2861]
9+
std = [0.3530]
10+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
11+
dst_train = datasets.FashionMNIST(data_path, train=True, download=True, transform=transform)
12+
dst_test = datasets.FashionMNIST(data_path, train=False, download=True, transform=transform)
13+
class_names = dst_train.classes
14+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/datasets/imagenet.py

+27
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
from torchvision import datasets, transforms
2+
from torch import tensor, long
3+
4+
5+
def ImageNet(data_path):
6+
channel = 3
7+
im_size = (224, 224)
8+
num_classes = 1000
9+
mean = [0.485, 0.456, 0.406]
10+
std = [0.229, 0.224, 0.225]
11+
normalize = transforms.Normalize(mean, std)
12+
dst_train = datasets.ImageNet(data_path, split="train", transform=transforms.Compose([
13+
transforms.Resize(256),
14+
transforms.CenterCrop(224),
15+
transforms.ToTensor(),
16+
normalize,
17+
]))
18+
dst_test = datasets.ImageNet(data_path, split="val", transform=transforms.Compose([
19+
transforms.Resize(256),
20+
transforms.CenterCrop(224),
21+
transforms.ToTensor(),
22+
normalize,
23+
]))
24+
class_names = dst_train.classes
25+
dst_train.targets = tensor(dst_train.targets, dtype=long)
26+
dst_test.targets = tensor(dst_test.targets, dtype=long)
27+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/datasets/mnist.py

+25
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
from torchvision import datasets, transforms
2+
import numpy as np
3+
4+
5+
def MNIST(data_path, permuted=False, permutation_seed=None):
6+
channel = 1
7+
im_size = (28, 28)
8+
num_classes = 10
9+
mean = [0.1307]
10+
std = [0.3081]
11+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
12+
if permuted:
13+
np.random.seed(permutation_seed)
14+
pixel_permutation = np.random.permutation(28 * 28)
15+
transform = transforms.Compose(
16+
[transform, transforms.Lambda(lambda x: x.view(-1, 1)[pixel_permutation].view(1, 28, 28))])
17+
18+
dst_train = datasets.MNIST(data_path, train=True, download=True, transform=transform)
19+
dst_test = datasets.MNIST(data_path, train=False, download=True, transform=transform)
20+
class_names = [str(c) for c in range(num_classes)]
21+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test
22+
23+
24+
def permutedMNIST(data_path, permutation_seed=None):
25+
return MNIST(data_path, True, permutation_seed)

deepcore/datasets/qmnist.py

+18
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from torchvision import datasets, transforms
2+
3+
4+
def QMNIST(data_path):
5+
channel = 1
6+
im_size = (28, 28)
7+
num_classes = 10
8+
mean = [0.1308]
9+
std = [0.3088]
10+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
11+
dst_train = datasets.QMNIST(data_path, train=True, download=True, transform=transform)
12+
dst_test = datasets.QMNIST(data_path, train=False, download=True, transform=transform)
13+
class_names = [str(c) for c in range(num_classes)]
14+
dst_train.targets = dst_train.targets[:, 0]
15+
dst_test.targets = dst_test.targets[:, 0]
16+
dst_train.compat = False
17+
dst_test.compat = False
18+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/datasets/svhn.py

+14
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
from torchvision import datasets, transforms
2+
3+
4+
def SVHN(data_path):
5+
channel = 3
6+
im_size = (32, 32)
7+
num_classes = 10
8+
mean = [0.4377, 0.4438, 0.4728]
9+
std = [0.1980, 0.2010, 0.1970]
10+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
11+
dst_train = datasets.SVHN(data_path, split='train', download=True, transform=transform)
12+
dst_test = datasets.SVHN(data_path, split='test', download=True, transform=transform)
13+
class_names = [str(c) for c in range(num_classes)]
14+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/datasets/tinyimagenet.py

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
from torchvision import datasets, transforms
2+
import os
3+
import requests
4+
import zipfile
5+
6+
7+
def TinyImageNet(data_path, downsize=True):
8+
if not os.path.exists(os.path.join(data_path, "tiny-imagenet-200")):
9+
url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip" # 248MB
10+
print("Downloading Tiny-ImageNet")
11+
r = requests.get(url, stream=True)
12+
with open(os.path.join(data_path, "tiny-imagenet-200.zip"), "wb") as f:
13+
for chunk in r.iter_content(chunk_size=1024):
14+
if chunk:
15+
f.write(chunk)
16+
17+
print("Unziping Tiny-ImageNet")
18+
with zipfile.ZipFile(os.path.join(data_path, "tiny-imagenet-200.zip")) as zf:
19+
zf.extractall(path=data_path)
20+
21+
channel = 3
22+
im_size = (32, 32) if downsize else (64, 64)
23+
num_classes = 200
24+
mean = (0.4802, 0.4481, 0.3975)
25+
std = (0.2770, 0.2691, 0.2821)
26+
27+
transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize(mean=mean, std=std)])
28+
if downsize:
29+
transform = transforms.Compose([transforms.Resize(32), transform])
30+
31+
dst_train = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/train'), transform=transform)
32+
dst_test = datasets.ImageFolder(root=os.path.join(data_path, 'tiny-imagenet-200/test'), transform=transform)
33+
34+
class_names = dst_train.classes
35+
return channel, im_size, num_classes, class_names, mean, std, dst_train, dst_test

deepcore/methods/__init__.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from .cal import *
2+
from .contextualdiversity import *
3+
from .coresetmethod import *
4+
from .craig import *
5+
from .deepfool import *
6+
from .earlytrain import *
7+
from .forgetting import *
8+
from .full import *
9+
from .glister import *
10+
from .grand import *
11+
from .gradmatch import *
12+
from .herding import *
13+
from .kcentergreedy import *
14+
from .submodular import *
15+
from .uncertainty import *
16+
from .uniform import *
17+

deepcore/methods/cal.py

+140
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
from .earlytrain import EarlyTrain
2+
from .methods_utils.euclidean import euclidean_dist_pair_np
3+
from .methods_utils.cossim import cossim_pair_np
4+
import numpy as np
5+
import torch
6+
from .. import nets
7+
from copy import deepcopy
8+
from torchvision import transforms
9+
10+
11+
class Cal(EarlyTrain):
12+
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200, specific_model=None,
13+
balance=True, metric="euclidean", neighbors: int = 10, pretrain_model: str = "ResNet18", **kwargs):
14+
super().__init__(dst_train, args, fraction, random_seed, epochs, specific_model, **kwargs)
15+
16+
self.balance = balance
17+
18+
assert neighbors > 0 and neighbors < 100
19+
self.neighbors = neighbors
20+
21+
if metric == "euclidean":
22+
self.metric = euclidean_dist_pair_np
23+
elif metric == "cossim":
24+
self.metric = lambda a, b: -1. * cossim_pair_np(a, b)
25+
elif callable(metric):
26+
self.metric = metric
27+
else:
28+
self.metric = euclidean_dist_pair_np
29+
30+
self.pretrain_model = pretrain_model
31+
32+
def num_classes_mismatch(self):
33+
raise ValueError("num_classes of pretrain dataset does not match that of the training dataset.")
34+
35+
def while_update(self, outputs, loss, targets, epoch, batch_idx, batch_size):
36+
if batch_idx % self.args.print_freq == 0:
37+
print('| Epoch [%3d/%3d] Iter[%3d/%3d]\t\tLoss: %.4f' % (
38+
epoch, self.epochs, batch_idx + 1, (self.n_pretrain_size // batch_size) + 1, loss.item()))
39+
40+
def find_knn(self):
41+
"""
42+
Find k-nearest-neighbor data points with the pretrained embedding model
43+
:return: knn matrix
44+
"""
45+
46+
# Initialize pretrained model
47+
model = nets.__dict__[self.pretrain_model](channel=self.args.channel, num_classes=self.args.num_classes,
48+
im_size=(224, 224), record_embedding=True, no_grad=True,
49+
pretrained=True).to(self.args.device)
50+
model.eval()
51+
52+
# Resize dst_train to 224*224
53+
if self.args.im_size[0] != 224 or self.args.im_size[1] != 224:
54+
dst_train = deepcopy(self.dst_train)
55+
dst_train.transform = transforms.Compose([dst_train.transform, transforms.Resize(224)])
56+
else:
57+
dst_train = self.dst_train
58+
59+
# Calculate the distance matrix and return knn results
60+
if self.balance:
61+
knn = []
62+
for c in range(self.args.num_classes):
63+
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
64+
65+
# Start recording embedding vectors
66+
embdeddings = []
67+
batch_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(dst_train, class_index),
68+
batch_size=self.args.selection_batch,
69+
num_workers=self.args.workers)
70+
batch_num = len(batch_loader)
71+
for i, (aa, _) in enumerate(batch_loader):
72+
if i % self.args.print_freq == 0:
73+
print("| Caculating embeddings for batch [%3d/%3d]" % (i + 1, batch_num))
74+
model(aa.to(self.args.device))
75+
embdeddings.append(model.embedding_recorder.embedding.flatten(1).cpu().numpy())
76+
77+
embdeddings = np.concatenate(embdeddings, axis=0)
78+
79+
knn.append(np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)])
80+
return knn
81+
else:
82+
# Start recording embedding vectors
83+
embdeddings = []
84+
batch_loader = torch.utils.data.DataLoader(dst_train, batch_size=self.args.selection_batch
85+
,num_workers=self.args.workers)
86+
batch_num = len(batch_loader)
87+
88+
for i, (aa, _) in enumerate(batch_loader):
89+
if i % self.args.print_freq == 0:
90+
print("| Caculating embeddings for batch [%3d/%3d]" % (i + 1, batch_num))
91+
model(aa.to(self.args.device))
92+
embdeddings.append(model.embedding_recorder.embedding.flatten(1).cpu().numpy())
93+
embdeddings = np.concatenate(embdeddings, axis=0)
94+
95+
return np.argsort(self.metric(embdeddings), axis=1)[:, 1:(self.neighbors + 1)]
96+
97+
def calc_kl(self, knn, index=None):
98+
self.model.eval()
99+
self.model.no_grad = True
100+
sample_num = self.n_train if index is None else len(index)
101+
probs = np.zeros([sample_num, self.args.num_classes])
102+
103+
batch_loader = torch.utils.data.DataLoader(
104+
self.dst_train if index is None else torch.utils.data.Subset(self.dst_train, index),
105+
batch_size=self.args.selection_batch, num_workers=self.args.workers)
106+
batch_num = len(batch_loader)
107+
108+
for i, (inputs, _) in enumerate(batch_loader):
109+
probs[i * self.args.selection_batch:(i + 1) * self.args.selection_batch] = torch.nn.functional.softmax(
110+
self.model(inputs.to(self.args.device)), dim=1).detach().cpu()
111+
112+
s = np.zeros(sample_num)
113+
for i in range(0, sample_num, self.args.selection_batch):
114+
if i % self.args.print_freq == 0:
115+
print("| Caculating KL-divergence for batch [%3d/%3d]" % (i // self.args.selection_batch + 1, batch_num))
116+
aa = np.expand_dims(probs[i:(i + self.args.selection_batch)], 1).repeat(self.neighbors, 1)
117+
bb = probs[knn[i:(i + self.args.selection_batch)], :]
118+
s[i:(i + self.args.selection_batch)] = np.mean(
119+
np.sum(0.5 * aa * np.log(aa / bb) + 0.5 * bb * np.log(bb / aa), axis=2), axis=1)
120+
self.model.no_grad = False
121+
return s
122+
123+
def finish_run(self):
124+
scores=[]
125+
if self.balance:
126+
selection_result = np.array([], dtype=np.int32)
127+
for c, knn in zip(range(self.args.num_classes), self.knn):
128+
class_index = np.arange(self.n_train)[self.dst_train.targets == c]
129+
scores.append(self.calc_kl(knn, class_index))
130+
selection_result = np.append(selection_result, class_index[np.argsort(
131+
#self.calc_kl(knn, class_index))[::1][:round(self.fraction * len(class_index))]])
132+
scores[-1])[::1][:round(self.fraction * len(class_index))]])
133+
else:
134+
selection_result = np.argsort(self.calc_kl(self.knn))[::1][:self.coreset_size]
135+
return {"indices": selection_result, "scores":scores}
136+
137+
def select(self, **kwargs):
138+
self.knn = self.find_knn()
139+
selection_result = self.run()
140+
return selection_result
+33
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
from .kcentergreedy import kCenterGreedy
2+
import torch
3+
4+
5+
# Acknowlegement to:
6+
# https://github.com/sharat29ag/CDAL
7+
8+
9+
class ContextualDiversity(kCenterGreedy):
10+
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, epochs=200,
11+
specific_model=None, balance=True, already_selected=[], torchvision_pretrain: bool = False, **kwargs):
12+
super(ContextualDiversity, self).__init__(dst_train, args, fraction, random_seed, epochs=epochs, specific_model=specific_model, balance=balance, already_selected=already_selected, torchvision_pretrain=torchvision_pretrain, **kwargs)
13+
self.metric = self._metric
14+
15+
def _metric(self, a_output, b_output):
16+
with torch.no_grad():
17+
# Overload self.metric function for kCenterGreedy Algorithm
18+
aa = a_output.view(a_output.shape[0], 1, a_output.shape[1]).repeat(1, b_output.shape[0], 1)
19+
bb = b_output.view(1, b_output.shape[0], b_output.shape[1]).repeat(a_output.shape[0], 1, 1)
20+
return torch.sum(0.5 * aa * torch.log(aa / bb) + 0.5 * bb * torch.log(bb / aa), dim=2)
21+
22+
def construct_matrix(self, index=None):
23+
self.model.eval()
24+
self.model.no_grad = True
25+
sample_num = self.n_train if index is None else len(index)
26+
matrix = torch.zeros([sample_num, self.args.num_classes], requires_grad=False).to(self.args.device)
27+
batch_loader = torch.utils.data.DataLoader(self.dst_train if index is None else
28+
torch.utils.data.Subset(self.dst_train, index), batch_size=self.args.selection_batch
29+
,num_workers=self.args.workers)
30+
for i, (inputs, _) in enumerate(batch_loader):
31+
matrix[i * self.args.selection_batch:min((i + 1) * self.args.selection_batch, sample_num)] = torch.nn.functional.softmax(self.model(inputs.to(self.args.device)), dim=1)
32+
self.model.no_grad = False
33+
return matrix

deepcore/methods/coresetmethod.py

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
class CoresetMethod(object):
2+
def __init__(self, dst_train, args, fraction=0.5, random_seed=None, **kwargs):
3+
if fraction <= 0.0 or fraction > 1.0:
4+
raise ValueError("Illegal Coreset Size.")
5+
self.dst_train = dst_train
6+
self.num_classes = len(dst_train.classes)
7+
self.fraction = fraction
8+
self.random_seed = random_seed
9+
self.index = []
10+
self.args = args
11+
12+
self.n_train = len(dst_train)
13+
self.coreset_size = round(self.n_train * fraction)
14+
15+
def select(self, **kwargs):
16+
return
17+

0 commit comments

Comments
 (0)