1_preprocessor/file_function.py

# ************************************************************
# Author : Bumsoo Kim, 2017
# Github : https://github.com/meliketoy/cellnet.pytorch
#
# Korea University, Data-Mining Lab
# Deep Convolutional Network Preprocessing Implementation
#
# Module : 1_preprocessor
# Description : file_function.py
# The function codes for file management.
# ***********************************************************

import os
import cv2
import sys
import csv
import augmentation as aug
import config as cf
import numpy as np
from operator import div

# print all the name of images in the directory.
def print_all_imgs(in_dir):
    for subdir, dirs, files in os.walk(in_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                print(file_path)

# check if the given file is an image format
def is_image(f):
    return f.endswith(".png") or f.endswith(".jpg")

# check if dir exists. If not, mkdir.
def check_and_mkdir(in_dir):
    if not os.path.exists(in_dir):
        print("Creating "+in_dir+"...")
        os.makedirs(in_dir)

# read and print all the image sizes of the dir.
def read_all_imgs(in_dir):
    for subdir, dirs, files in os.walk(in_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                img = cv2.imread(file_path)
                print('{:<100} {:>10}'.format(file_path, str(img.shape)))
                # print(file_path + ",img size = "+str(img.shape))

# resize the imgs from in_dir, and save with exact same hierarchy in the out_dir
def resize_images(in_dir, out_dir, target_size):
    check_and_mkdir(out_dir) # sanity check for the target output directory

    for subdir, dirs, files in os.walk(in_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                img = cv2.imread(file_path)
                resized_img = cv2.resize(img, (target_size, target_size), interpolation = cv2.INTER_CUBIC)
                class_dir = out_dir + os.sep + file_path.split("/")[-2]
                if len(file_path.split("/")) >= 7:
                    out_dir = cf.split_dir
                    class_dir = os.path.join(out_dir, file_path.split("/")[-3], file_path.split("/")[-2])
                check_and_mkdir(class_dir) # sanity check for the target class directory

                file_name = class_dir + os.sep + file_path.split("/")[-1]
                print(file_name)
                cv2.imwrite(file_name, resized_img)

def resize_and_contrast(in_dir, out_dir, target_size):
    check_and_mkdir(out_dir)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))

    for subdir, dirs, files in os.walk(in_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                img = cv2.imread(file_path, 0)
                resized_img = cv2.resize(img, (target_size, target_size), interpolation = cv2.INTER_CUBIC)
                class_dir = out_dir + os.sep + file_path.split("/")[-2]
                check_and_mkdir(class_dir)

                file_name = class_dir + os.sep + file_path.split("/")[-1]
                print(file_name)

                norm_image = cv2.normalize(resized_img, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F) * 256
                # norm_image = clahe.apply(resized_img)
                cv2.imwrite(file_name, norm_image)

# count the direct one-step sub directories (which will represent the class name)
def class_info(in_dir, mode):
    class_lst = []

    for subdir, dirs, files in os.walk(in_dir):
        class_lst = dirs # the 'dirs' variable after the first os.walk loop will return the list of classes
        break

    if(mode == "len"):
        return (len(class_lst))
    elif(mode == "list"):
        return (class_lst)

# count the containing images of each classes
def count_each_class(in_dir):
    class_lst, cnt_lst = class_info(in_dir, "list"), []

    for class_dir in class_lst:
        class_count = 0
        for subdir, dirs, files in os.walk(in_dir + os.sep + class_dir):
            for f in files:
                file_path = subdir + os.sep + f
                if (is_image(f)):
                    class_count += 1

        print("\t| {:<15} {:>5}".format(class_dir, class_count))
        cnt_lst.append(class_count)

    return cnt_lst

# return whether the current phase is 'train' or 'validation'
def return_phase(num, val_num):
    if (num < val_num):
        return "val" + os.sep
    else:
        return "train" + os.sep

# create a train-val sub-organized directory from the original class directory
def create_train_val_split(in_dir, split_dir, split=cf.split):
    print("Saving train-val splitted images into %s" %(split_dir))
    check_and_mkdir(split_dir)
    class_lst = class_info(in_dir, "list")

    for phase in ["train", "val"]:
        phase_dir = split_dir + os.sep + phase # The output directory will be "./split/[:file_dir]/[:phase]/[:class]"
        check_and_mkdir(phase_dir)

        for cls in class_lst:
            cls_dir = split_dir + os.sep + phase + os.sep + cls # Where to read the image from
            check_and_mkdir(cls_dir)

    # val_num = cf.val_num # temporary
    for subdir, dirs, files in os.walk(in_dir):
        val_num = int(len(files)*cf.val_ratio) if (split=='ratio') else cf.val_num
        cnt = 0
        for f in files:
            file_path = subdir + os.sep + f
            if(is_image(f)):
                img = cv2.imread(file_path)
                cv2.imwrite(split_dir + os.sep + return_phase(cnt, val_num) + subdir.split("/")[-1] + os.sep + f, img)
                cnt += 1

    return split_dir

# get train-val information
def get_split_info(split_dir):
    # Must be activated after the 'split' option.
    for phase in ["train", "val"]:
        print("| %s set : " %phase)
        count_each_class(split_dir + os.sep + phase)

    return split_dir

# train data augmentation
def aug_train(split_dir, mode):
    train_dir = split_dir + os.sep + "train"

    for subdir, dirs, files in os.walk(train_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                print(file_path)
                name, ext = os.path.splitext(f)
                img = cv2.imread(file_path)
                for i in range(1,4):
                    rot_dir = (subdir + os.sep + name + "_aug_"+str(i*90)+ext)
                    if(mode == 'random'):
                        cv2.imwrite(rot_dir, aug.rotation(img, 0, 'random'))
                    elif(mode == 'strict'):
                        cv2.imwrite(rot_dir, aug.rotation(img, i, 'strict'))
                    else:
                        print("The mode should be either random | strict")
                        sys.exit(1)

def train_mean(split_dir):
    train_dir = split_dir + os.sep + "train"
    train_img_num = 0
    train_mean_lst = [0.0, 0.0, 0.0]

    for subdir, dirs, files in os.walk(train_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                img = cv2.imread(file_path)
                train_img_num += 1
                for channel in range(3):
                    train_mean_lst[channel] += img[:,:,channel].mean()

    mean_map = map(div, train_mean_lst, [train_img_num, train_img_num, train_img_num])
    return map(div, mean_map, [255.0, 255.0, 255.0])

def train_std(split_dir, train_mean):
    train_dir = split_dir + os.sep + "train"
    train_img_num = 0
    train_std_lst = [0.0, 0.0, 0.0]

    for subdir, dirs, files in os.walk(train_dir):
        for f in files:
            file_path = subdir + os.sep + f
            if (is_image(f)):
                img = cv2.imread(file_path)
                train_img_num += 1
                for channel in range(3):
                    train_std_lst[channel] += img[:,:,channel].var() # per image var()

    std_map = map(div, train_std_lst, [train_img_num, train_img_num, train_img_num])
    std_map = np.sqrt(std_map)
    return map(div, std_map, [255.0, 255.0, 255.0])