-
Notifications
You must be signed in to change notification settings - Fork 18
/
Copy pathfile_function.py
217 lines (182 loc) · 7.91 KB
/
file_function.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
# ************************************************************
# Author : Bumsoo Kim, 2017
# Github : https://github.com/meliketoy/cellnet.pytorch
#
# Korea University, Data-Mining Lab
# Deep Convolutional Network Preprocessing Implementation
#
# Module : 1_preprocessor
# Description : file_function.py
# The function codes for file management.
# ***********************************************************
import os
import cv2
import sys
import csv
import augmentation as aug
import config as cf
import numpy as np
from operator import div
# print all the name of images in the directory.
def print_all_imgs(in_dir):
for subdir, dirs, files in os.walk(in_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
print(file_path)
# check if the given file is an image format
def is_image(f):
return f.endswith(".png") or f.endswith(".jpg")
# check if dir exists. If not, mkdir.
def check_and_mkdir(in_dir):
if not os.path.exists(in_dir):
print("Creating "+in_dir+"...")
os.makedirs(in_dir)
# read and print all the image sizes of the dir.
def read_all_imgs(in_dir):
for subdir, dirs, files in os.walk(in_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
img = cv2.imread(file_path)
print('{:<100} {:>10}'.format(file_path, str(img.shape)))
# print(file_path + ",img size = "+str(img.shape))
# resize the imgs from in_dir, and save with exact same hierarchy in the out_dir
def resize_images(in_dir, out_dir, target_size):
check_and_mkdir(out_dir) # sanity check for the target output directory
for subdir, dirs, files in os.walk(in_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
img = cv2.imread(file_path)
resized_img = cv2.resize(img, (target_size, target_size), interpolation = cv2.INTER_CUBIC)
class_dir = out_dir + os.sep + file_path.split("/")[-2]
if len(file_path.split("/")) >= 7:
out_dir = cf.split_dir
class_dir = os.path.join(out_dir, file_path.split("/")[-3], file_path.split("/")[-2])
check_and_mkdir(class_dir) # sanity check for the target class directory
file_name = class_dir + os.sep + file_path.split("/")[-1]
print(file_name)
cv2.imwrite(file_name, resized_img)
def resize_and_contrast(in_dir, out_dir, target_size):
check_and_mkdir(out_dir)
clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
for subdir, dirs, files in os.walk(in_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
img = cv2.imread(file_path, 0)
resized_img = cv2.resize(img, (target_size, target_size), interpolation = cv2.INTER_CUBIC)
class_dir = out_dir + os.sep + file_path.split("/")[-2]
check_and_mkdir(class_dir)
file_name = class_dir + os.sep + file_path.split("/")[-1]
print(file_name)
norm_image = cv2.normalize(resized_img, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 256
# norm_image = clahe.apply(resized_img)
cv2.imwrite(file_name, norm_image)
# count the direct one-step sub directories (which will represent the class name)
def class_info(in_dir, mode):
class_lst = []
for subdir, dirs, files in os.walk(in_dir):
class_lst = dirs # the 'dirs' variable after the first os.walk loop will return the list of classes
break
if(mode == "len"):
return (len(class_lst))
elif(mode == "list"):
return (class_lst)
# count the containing images of each classes
def count_each_class(in_dir):
class_lst, cnt_lst = class_info(in_dir, "list"), []
for class_dir in class_lst:
class_count = 0
for subdir, dirs, files in os.walk(in_dir + os.sep + class_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
class_count += 1
print("\t| {:<15} {:>5}".format(class_dir, class_count))
cnt_lst.append(class_count)
return cnt_lst
# return whether the current phase is 'train' or 'validation'
def return_phase(num, val_num):
if (num < val_num):
return "val" + os.sep
else:
return "train" + os.sep
# create a train-val sub-organized directory from the original class directory
def create_train_val_split(in_dir, split_dir, split=cf.split):
print("Saving train-val splitted images into %s" %(split_dir))
check_and_mkdir(split_dir)
class_lst = class_info(in_dir, "list")
for phase in ["train", "val"]:
phase_dir = split_dir + os.sep + phase # The output directory will be "./split/[:file_dir]/[:phase]/[:class]"
check_and_mkdir(phase_dir)
for cls in class_lst:
cls_dir = split_dir + os.sep + phase + os.sep + cls # Where to read the image from
check_and_mkdir(cls_dir)
# val_num = cf.val_num # temporary
for subdir, dirs, files in os.walk(in_dir):
val_num = int(len(files)*cf.val_ratio) if (split=='ratio') else cf.val_num
cnt = 0
for f in files:
file_path = subdir + os.sep + f
if(is_image(f)):
img = cv2.imread(file_path)
cv2.imwrite(split_dir + os.sep + return_phase(cnt, val_num) + subdir.split("/")[-1] + os.sep + f, img)
cnt += 1
return split_dir
# get train-val information
def get_split_info(split_dir):
# Must be activated after the 'split' option.
for phase in ["train", "val"]:
print("| %s set : " %phase)
count_each_class(split_dir + os.sep + phase)
return split_dir
# train data augmentation
def aug_train(split_dir, mode):
train_dir = split_dir + os.sep + "train"
for subdir, dirs, files in os.walk(train_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
print(file_path)
name, ext = os.path.splitext(f)
img = cv2.imread(file_path)
for i in range(1,4):
rot_dir = (subdir + os.sep + name + "_aug_"+str(i*90)+ext)
if(mode == 'random'):
cv2.imwrite(rot_dir, aug.rotation(img, 0, 'random'))
elif(mode == 'strict'):
cv2.imwrite(rot_dir, aug.rotation(img, i, 'strict'))
else:
print("The mode should be either random | strict")
sys.exit(1)
def train_mean(split_dir):
train_dir = split_dir + os.sep + "train"
train_img_num = 0
train_mean_lst = [0.0, 0.0, 0.0]
for subdir, dirs, files in os.walk(train_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
img = cv2.imread(file_path)
train_img_num += 1
for channel in range(3):
train_mean_lst[channel] += img[:,:,channel].mean()
mean_map = map(div, train_mean_lst, [train_img_num, train_img_num, train_img_num])
return map(div, mean_map, [255.0, 255.0, 255.0])
def train_std(split_dir, train_mean):
train_dir = split_dir + os.sep + "train"
train_img_num = 0
train_std_lst = [0.0, 0.0, 0.0]
for subdir, dirs, files in os.walk(train_dir):
for f in files:
file_path = subdir + os.sep + f
if (is_image(f)):
img = cv2.imread(file_path)
train_img_num += 1
for channel in range(3):
train_std_lst[channel] += img[:,:,channel].var() # per image var()
std_map = map(div, train_std_lst, [train_img_num, train_img_num, train_img_num])
std_map = np.sqrt(std_map)
return map(div, std_map, [255.0, 255.0, 255.0])