From 239295e1f77e740cf4e2290c7788f5b60c2b4df6 Mon Sep 17 00:00:00 2001 From: Denny Date: Sun, 23 Apr 2017 19:59:02 -0400 Subject: [PATCH 1/2] base files to work on --- da_main.py | 228 ++++++++++++++++++++++++++++++++++++++++++++++++ da_tf.py | 83 ++++++++++++++++++ noise_manage.py | 77 ++++++++++++++++ 3 files changed, 388 insertions(+) create mode 100644 da_main.py create mode 100644 da_tf.py create mode 100644 noise_manage.py diff --git a/da_main.py b/da_main.py new file mode 100644 index 0000000..803dfe9 --- /dev/null +++ b/da_main.py @@ -0,0 +1,228 @@ + +""" +Created on Apr 22, 2017 + +@author: denny +""" + + + +from sklearn import datasets +import da_tf +from matplotlib import pyplot as plt +import pickle +import numpy as np +from noise_manage import noisy +import os, os.path +import cv2 +import sys +from numpy.f2py.rules import arg_rules + +def unpickle(file): + fo = open(file, 'rb') + dict = pickle.load(fo) + fo.close() + return dict + +def grayscale(a): + return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1) + +g_r = 0 +g_c = 0 +g_mean = 0 +g_sigma10 = (10.0,10.0,10.0) +g_sigma15 = (15.0,15.0,15.0) +g_sigma25 = (25.0,25.0,25.0) +g_sigma35 = (35.0,35.0,35.0) +g_sigma45 = (45.0,45.0,45.0) +g_sigma50 = (50.0,50.0,50.0) + +path = "/home/denny/NYU/IMAGE/imagedata/all" + + +def create_noisy_patches(patches): + noisy_patches = [] + +# noisy_patch = [] + for patch in patches: + len = 0 + noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma10)) + + len += 1 + noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma15)) + len += 1 + noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma25)) + len += 1 + noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma35)) + len += 1 + noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma45)) + len += 1 + noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma50)) + len += 1 +# noisy_patches.append(noisy_patch) + return len, noisy_patches + + +def create_dataset_patches(images, patchshape, patch_shift=4): + """ + Given an image list, extract patches of a given shape Patch shift + is the certain shift amount for the successive patch location + """ + rowstart = 0; colstart = 0 + + patches = [] + for active in images: + rowstart = 0 + while rowstart < active.shape[0] - patchshape[0]: + + colstart = 0 + while colstart < active.shape[1] - patchshape[1]: + # Slice tuple indexing the region of our proposed patch + region = (slice(rowstart, rowstart + patchshape[0]), + slice(colstart, colstart + patchshape[1])) + + # The actual pixels in that region. + patch = active[region] + + # Accept the patch. + patches.append(patch) + colstart += patch_shift + + + rowstart += patch_shift + + + return patches + +def plot_patches(patches, fignum=None, low=0, high=0): + """ + Given a stack of 2D patches indexed by the first dimension, plot the + patches in subplots. + 'low' and 'high' are optional arguments to control which patches + actually get plotted. 'fignum' chooses the figure to plot in. + """ + try: + istate = plt.isinteractive() + plt.ioff() + if fignum is None: + fig = plt.gcf() + else: + fig = plt.figure(fignum) + if high == 0: + high = len(patches) +# pmin, pmax = patches.min(), patches.max() + dims = np.ceil(np.sqrt(high - low)) + for idx in xrange(high - low): + spl = plt.subplot(dims, dims, idx + 1) + ax = plt.axis('off') + im = plt.imshow(patches[idx], cmap=matplotlib.cm.gray) +# cl = plt.clim(pmin, pmax) + plt.show() + finally: + plt.interactive(istate) + +def load_images_from_folder(folder): + """ + Given a folder load all the images to a list in python and return + the list + """ + imgs = [] + valid_images = [".jpg",".pbm",".png",".ppm"] + for f in os.listdir(path): + ext = os.path.splitext(f)[1] + if ext.lower() not in valid_images: + print "NOTE: ", f, " avoided from dataset" + continue +# imgs.append(Image.open(os.path.join(path,f))) + + image = cv2.imread(os.path.join(path,f), cv2.IMREAD_COLOR ) + + # swap to RGB format + red = image[:,:,2].copy() + blue = image[:,:,0].copy() + image[:,:,0] = red + image[:,:,2] = blue + image = np.asarray( image) + global g_r + global g_c + if g_r == 0: + g_r,g_c,d = image.shape + + norm_image = gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) +# norm_image = cv2.normalize(gray_image, +# alpha=0, +# beta=1, +# dst = norm_image, +# norm_type=cv2.NORM_MINMAX, +# dtype=cv2.CV_32F) + + image_vector = norm_image.flatten() + if len(imgs) > 0: + imgs = np.vstack((imgs, image_vector)) + + else: + imgs = image_vector + + +# imgs.append(image_vector) + + imgs = np.matrix(imgs) + return imgs + +def main(): + +# hidden_dim = 1 +# data = datasets.load_iris().data +# input_dim = len(data[0]) +# ae = da_tf.Autoencoder(input_dim, hidden_dim) +# ae.train(data) +# ae.test([[8, 4, 6, 2]]) + + + +# names = unpickle('./cifar-10-batches-py/batches.meta')['label_names'] +# data, labels = [], [] +# for i in range(1, 6): +# filename = './cifar-10-batches-py/data_batch_' + str(i) +# batch_data = unpickle(filename) +# if len(data) > 0: +# data = np.vstack((data, batch_data['data'])) +# labels = np.hstack((labels, batch_data['labels'])) +# else: +# data = batch_data['data'] +# labels = batch_data['labels'] +# +# data = grayscale(data) + + images = load_images_from_folder(path) + print "shape of input images = ", images.shape +# data = grayscale(images) +# +# x = np.matrix(data) + + + print('Some examples of images we will feed to the autoencoder for training') +# plt.rcParams['figure.figsize'] = (10, 10) +# num_examples = 5 +# global g_r; global g_c +# for i in range(num_examples): +# in_image = np.reshape(images[i], (g_r, g_c)) +# print in_image.shape +# plt.subplot(1, num_examples, i+1) +# plt.imshow(in_image, cmap='Greys_r') +# plt.show() + print "our data = " , images + + input_dim = np.shape(images)[1] + print "our input_dim = " , input_dim + + hidden_dim = 100 + ae = da_tf.Denoiser(input_dim, hidden_dim) + ae.train(images) + + +if __name__== "__main__": + main() + + + diff --git a/da_tf.py b/da_tf.py new file mode 100644 index 0000000..4ad18b6 --- /dev/null +++ b/da_tf.py @@ -0,0 +1,83 @@ + +""" +Created on Apr 22, 2017 + +@author: denny +""" + +import tensorflow as tf +import numpy as np +import time + +def get_batch(X, Xn, size): + a = np.random.choice(len(X), size, replace=False) + + return X[a], Xn[a] + +class Denoiser: + + def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_rate=0.001): + self.epoch = epoch + self.batch_size = batch_size + self.learning_rate = learning_rate + + self.x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x') + self.x_noised = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_noised') + with tf.name_scope('encode'): + self.weights1 = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights') + self.biases1 = tf.Variable(tf.zeros([hidden_dim]), name='biases') + self.encoded = tf.nn.sigmoid(tf.matmul(self.x_noised, self.weights1) + self.biases1, name='encoded') + with tf.name_scope('decode'): + weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights') + biases = tf.Variable(tf.zeros([input_dim]), name='biases') + self.decoded = tf.matmul(self.encoded, weights) + biases + self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.x, self.decoded)))) + self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss) + self.saver = tf.train.Saver() + + def add_noise(self, data): + noise_type = 'gaussian' + if noise_type == 'gaussian': + print "shape of data = ", np.shape(data) + n = np.random.normal(0, 0.1, np.shape(data)) + return data + n + if 'mask' in noise_type: + frac = float(noise_type.split('-')[1]) + temp = np.copy(data) + for i in temp: + n = np.random.choice(len(i), round(frac * len(i)), replace=False) + i[n] = 0 + return temp + + def train(self, data): + data_noised = self.add_noise(data) + with open('log.csv', 'w') as writer: + with tf.Session() as sess: + sess.run(tf.global_variables_initializer()) + for i in range(self.epoch): + for j in range(50): + batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size) + l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data, self.x_noised: batch_data_noised}) + if i % 10 == 0: + print('epoch {0}: loss = {1}'.format(i, l)) + self.saver.save(sess, './model.ckpt') + epoch_time = int(time.time()) + row_str = str(epoch_time) + ',' + str(i) + ',' + str(l) + '\n' + writer.write(row_str) + writer.flush() + self.saver.save(sess, './model.ckpt') + + def test(self, data): + with tf.Session() as sess: + self.saver.restore(sess, './model.ckpt') + hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data}) + print('input', data) + print('compressed', hidden) + print('reconstructed', reconstructed) + return reconstructed + + def get_params(self): + with tf.Session() as sess: + self.saver.restore(sess, './model.ckpt') + weights, biases = sess.run([self.weights1, self.biases1]) + return weights, biases \ No newline at end of file diff --git a/noise_manage.py b/noise_manage.py new file mode 100644 index 0000000..b625740 --- /dev/null +++ b/noise_manage.py @@ -0,0 +1,77 @@ +''' +Created on Apr 3, 2017 + +@author: denny +''' + + +# +# Parameters +# ---------- +# image : ndarray +# Input image data. Will be converted to float. +# mode : str +# One of the following strings, selecting the type of noise to add: +# +# 'gauss' Gaussian-distributed additive noise. +# 'poisson' Poisson-distributed noise generated from the data. +# 's&p' Replaces random pixels with 0 or 1. +# 'speckle' Multiplicative noise using out = image + n*image,where +# n is uniform noise with specified mean & variance. + + +import numpy as np +import os +import cv2 +import numpy as np +import os, os.path +import cv2 +import sys + +if __name__ == '__main__': + pass + + +def noisy(noise_typ,image, _mean, _sigma): + if noise_typ == "gauss": + row,col,ch= image.shape + mean = _mean + sigma = _sigma +# gauss = np.random.normal(mean,sigma,(row,col,ch)) + gauss = np.zeros((row,col,ch)) + cv2.randn(gauss, mean, sigma) + gauss = gauss.reshape(row,col,ch) + + noisy = image + gauss + noisy = cv2.normalize(noisy, noisy, 0, 1, cv2.NORM_MINMAX, dtype=cv2.CV_32FC3 ) + noisy = np.array(noisy) + return noisy + elif noise_typ == "s&p": + row,col,ch = image.shape + s_vs_p = 0.5 + amount = 0.004 + out = np.copy(image) + # Salt mode + num_salt = np.ceil(amount * image.size * s_vs_p) + coords = [np.random.randint(0, i - 1, int(num_salt)) + for i in image.shape] + out[coords] = 1 + + # Pepper mode + num_pepper = np.ceil(amount* image.size * (1. - s_vs_p)) + coords = [np.random.randint(0, i - 1, int(num_pepper)) + for i in image.shape] + out[coords] = 0 + return out + elif noise_typ == "poisson": + vals = len(np.unique(image)) + vals = 2 ** np.ceil(np.log2(vals)) + noisy = np.random.poisson(image * vals) / float(vals) + return noisy + elif noise_typ =="speckle": + row,col,ch = image.shape + gauss = np.random.randn(row,col,ch) + gauss = gauss.reshape(row,col,ch) + noisy = image + image * gauss + return noisy + \ No newline at end of file From e6a7e3a39d1b04ca671f87235c902f6cf0a7ab27 Mon Sep 17 00:00:00 2001 From: Denny Date: Sun, 23 Apr 2017 21:38:37 -0400 Subject: [PATCH 2/2] adding running version with patch support --- da_main.py | 100 ++++++++++++++++++++++------------------------------- da_tf.py | 33 +++++++++++------- 2 files changed, 63 insertions(+), 70 deletions(-) diff --git a/da_main.py b/da_main.py index 803dfe9..9f41c41 100644 --- a/da_main.py +++ b/da_main.py @@ -36,6 +36,7 @@ def grayscale(a): g_sigma35 = (35.0,35.0,35.0) g_sigma45 = (45.0,45.0,45.0) g_sigma50 = (50.0,50.0,50.0) +g_shape = (64,64) path = "/home/denny/NYU/IMAGE/imagedata/all" @@ -63,7 +64,7 @@ def create_noisy_patches(patches): return len, noisy_patches -def create_dataset_patches(images, patchshape, patch_shift=4): +def create_dataset_patches(image, patchshape, patch_shift=4): """ Given an image list, extract patches of a given shape Patch shift is the certain shift amount for the successive patch location @@ -71,27 +72,33 @@ def create_dataset_patches(images, patchshape, patch_shift=4): rowstart = 0; colstart = 0 patches = [] - for active in images: - rowstart = 0 - while rowstart < active.shape[0] - patchshape[0]: - - colstart = 0 - while colstart < active.shape[1] - patchshape[1]: - # Slice tuple indexing the region of our proposed patch - region = (slice(rowstart, rowstart + patchshape[0]), - slice(colstart, colstart + patchshape[1])) - - # The actual pixels in that region. - patch = active[region] - - # Accept the patch. - patches.append(patch) - colstart += patch_shift + active = image + rowstart = 0 + while rowstart < active.shape[0] - patchshape[0]: + + colstart = 0 + while colstart < active.shape[1] - patchshape[1]: + # Slice tuple indexing the region of our proposed patch + region = (slice(rowstart, rowstart + patchshape[0]), + slice(colstart, colstart + patchshape[1])) - rowstart += patch_shift + # The actual pixels in that region. + patch = active[region] + # Accept the patch. + patch_vector = patch.flatten() + if len(patches) > 0: + patches = np.vstack((patches, patch_vector)) + else: + patches = patch_vector + +# patches.append(patch) + colstart += patch_shift + + + rowstart += patch_shift - + patches = np.matrix(patches) return patches def plot_patches(patches, fignum=None, low=0, high=0): @@ -155,13 +162,14 @@ def load_images_from_folder(folder): # dst = norm_image, # norm_type=cv2.NORM_MINMAX, # dtype=cv2.CV_32F) - - image_vector = norm_image.flatten() + patches = create_dataset_patches(norm_image, g_shape, patch_shift=32) + num_patches = len(patches) +# print "no of patches made:", num_patches, patches.shape + if len(imgs) > 0: - imgs = np.vstack((imgs, image_vector)) - + imgs = np.vstack((imgs, patches)) else: - imgs = image_vector + imgs = patches # imgs.append(image_vector) @@ -170,31 +178,9 @@ def load_images_from_folder(folder): return imgs def main(): - -# hidden_dim = 1 -# data = datasets.load_iris().data -# input_dim = len(data[0]) -# ae = da_tf.Autoencoder(input_dim, hidden_dim) -# ae.train(data) -# ae.test([[8, 4, 6, 2]]) - - - -# names = unpickle('./cifar-10-batches-py/batches.meta')['label_names'] -# data, labels = [], [] -# for i in range(1, 6): -# filename = './cifar-10-batches-py/data_batch_' + str(i) -# batch_data = unpickle(filename) -# if len(data) > 0: -# data = np.vstack((data, batch_data['data'])) -# labels = np.hstack((labels, batch_data['labels'])) -# else: -# data = batch_data['data'] -# labels = batch_data['labels'] -# -# data = grayscale(data) images = load_images_from_folder(path) + print "no of images made:", len(images), images.shape print "shape of input images = ", images.shape # data = grayscale(images) # @@ -202,20 +188,18 @@ def main(): print('Some examples of images we will feed to the autoencoder for training') -# plt.rcParams['figure.figsize'] = (10, 10) -# num_examples = 5 -# global g_r; global g_c -# for i in range(num_examples): -# in_image = np.reshape(images[i], (g_r, g_c)) -# print in_image.shape -# plt.subplot(1, num_examples, i+1) -# plt.imshow(in_image, cmap='Greys_r') -# plt.show() - print "our data = " , images + plt.rcParams['figure.figsize'] = (10, 10) + num_examples = 5 + global g_r; global g_c + for i in range(num_examples): + in_image = np.reshape(images[i], g_shape) + print in_image.shape + plt.subplot(1, num_examples, i+1) + plt.imshow(in_image, cmap='Greys_r') + plt.show() input_dim = np.shape(images)[1] print "our input_dim = " , input_dim - hidden_dim = 100 ae = da_tf.Denoiser(input_dim, hidden_dim) ae.train(images) diff --git a/da_tf.py b/da_tf.py index 4ad18b6..bda952c 100644 --- a/da_tf.py +++ b/da_tf.py @@ -7,6 +7,7 @@ import tensorflow as tf import numpy as np +import cv2 import time def get_batch(X, Xn, size): @@ -35,25 +36,33 @@ def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_r self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss) self.saver = tf.train.Saver() - def add_noise(self, data): + def add_noise(self, data, mean, sigma): noise_type = 'gaussian' + noisy_patches = [] + i = 1 if noise_type == 'gaussian': - print "shape of data = ", np.shape(data) - n = np.random.normal(0, 0.1, np.shape(data)) - return data + n - if 'mask' in noise_type: - frac = float(noise_type.split('-')[1]) - temp = np.copy(data) - for i in temp: - n = np.random.choice(len(i), round(frac * len(i)), replace=False) - i[n] = 0 - return temp + for patch in data: + print "On ",i,"/",len(data),"\r", + i += 1 + n = np.random.normal(mean, sigma, np.shape(patch)) + added = patch + n + cv2.normalize(added, added, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_32FC3 ) +# noisy_patches.append(added) + if len(noisy_patches) > 0: + noisy_patches = np.vstack((noisy_patches, added)) + else: + noisy_patches = added + + noisy_patches = np.matrix(noisy_patches) + return noisy_patches def train(self, data): - data_noised = self.add_noise(data) + print "Adding Noise!" + data_noised = self.add_noise(data, 0, 10) with open('log.csv', 'w') as writer: with tf.Session() as sess: sess.run(tf.global_variables_initializer()) + print "Training to denoise" for i in range(self.epoch): for j in range(50): batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size)