From 239295e1f77e740cf4e2290c7788f5b60c2b4df6 Mon Sep 17 00:00:00 2001
From: Denny <microwavedenny@gmail.com>
Date: Sun, 23 Apr 2017 19:59:02 -0400
Subject: [PATCH 1/2] base files to work on

---
 da_main.py      | 228 ++++++++++++++++++++++++++++++++++++++++++++++++
 da_tf.py        |  83 ++++++++++++++++++
 noise_manage.py |  77 ++++++++++++++++
 3 files changed, 388 insertions(+)
 create mode 100644 da_main.py
 create mode 100644 da_tf.py
 create mode 100644 noise_manage.py

diff --git a/da_main.py b/da_main.py
new file mode 100644
index 0000000..803dfe9
--- /dev/null
+++ b/da_main.py
@@ -0,0 +1,228 @@
+
+"""
+Created on Apr 22, 2017
+
+@author: denny
+"""
+
+
+
+from sklearn import datasets
+import da_tf
+from matplotlib import pyplot as plt
+import pickle
+import numpy as np
+from noise_manage import noisy
+import os, os.path
+import cv2
+import sys
+from numpy.f2py.rules import arg_rules
+
+def unpickle(file):
+    fo = open(file, 'rb')
+    dict = pickle.load(fo)
+    fo.close()
+    return dict
+
+def grayscale(a):
+    return a.reshape(a.shape[0], 3, 32, 32).mean(1).reshape(a.shape[0], -1)
+
+g_r = 0
+g_c = 0
+g_mean = 0
+g_sigma10 = (10.0,10.0,10.0)
+g_sigma15 = (15.0,15.0,15.0)
+g_sigma25 = (25.0,25.0,25.0)
+g_sigma35 = (35.0,35.0,35.0)
+g_sigma45 = (45.0,45.0,45.0)
+g_sigma50 = (50.0,50.0,50.0)
+
+path = "/home/denny/NYU/IMAGE/imagedata/all"
+
+
+def create_noisy_patches(patches):
+    noisy_patches = []
+    
+#     noisy_patch = []
+    for patch in patches:
+        len = 0
+        noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma10))
+        
+        len += 1
+        noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma15))
+        len += 1
+        noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma25))
+        len += 1
+        noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma35))
+        len += 1
+        noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma45))
+        len += 1
+        noisy_patches.append(noisy("gauss", patch, g_mean, g_sigma50))
+        len += 1
+#         noisy_patches.append(noisy_patch)
+    return len, noisy_patches
+
+    
+def create_dataset_patches(images, patchshape, patch_shift=4):
+    """
+    Given an image list, extract patches of a given shape Patch shift
+    is the certain shift amount for the successive patch location
+    """
+    rowstart = 0; colstart = 0
+ 
+    patches = []
+    for active in images:
+        rowstart = 0  
+        while rowstart < active.shape[0] - patchshape[0]:
+             
+            colstart = 0       
+            while colstart < active.shape[1] - patchshape[1]:
+                # Slice tuple indexing the region of our proposed patch
+                region = (slice(rowstart, rowstart + patchshape[0]),
+                          slice(colstart, colstart + patchshape[1]))
+                 
+                # The actual pixels in that region.
+                patch = active[region]
+                
+                # Accept the patch.
+                patches.append(patch)
+                colstart += patch_shift
+     
+             
+            rowstart += patch_shift
+ 
+     
+    return patches
+    
+def plot_patches(patches, fignum=None, low=0, high=0):
+    """
+    Given a stack of 2D patches indexed by the first dimension, plot the
+    patches in subplots. 
+    'low' and 'high' are optional arguments to control which patches
+    actually get plotted. 'fignum' chooses the figure to plot in.
+    """
+    try:
+        istate = plt.isinteractive()
+        plt.ioff()
+        if fignum is None:
+            fig = plt.gcf()
+        else:
+            fig = plt.figure(fignum)
+        if high == 0:
+            high = len(patches)
+#         pmin, pmax = patches.min(), patches.max()
+        dims = np.ceil(np.sqrt(high - low))
+        for idx in xrange(high - low):
+            spl = plt.subplot(dims, dims, idx + 1)
+            ax = plt.axis('off')
+            im = plt.imshow(patches[idx], cmap=matplotlib.cm.gray)
+#             cl = plt.clim(pmin, pmax)
+        plt.show()
+    finally:
+        plt.interactive(istate)
+  
+def load_images_from_folder(folder):
+    """
+    Given a folder load all the images to a list in python and return 
+    the list
+    """
+    imgs = []
+    valid_images = [".jpg",".pbm",".png",".ppm"]
+    for f in os.listdir(path):
+        ext = os.path.splitext(f)[1]
+        if ext.lower() not in valid_images:
+            print "NOTE: ", f, " avoided from dataset"
+            continue
+#         imgs.append(Image.open(os.path.join(path,f)))
+        
+        image = cv2.imread(os.path.join(path,f), cv2.IMREAD_COLOR )
+        
+        # swap to RGB format
+        red = image[:,:,2].copy()
+        blue = image[:,:,0].copy()             
+        image[:,:,0] = red
+        image[:,:,2] = blue
+        image = np.asarray( image)
+        global g_r 
+        global g_c
+        if g_r == 0:
+            g_r,g_c,d = image.shape
+        
+        norm_image = gray_image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
+#         norm_image = cv2.normalize(gray_image, 
+#                                    alpha=0, 
+#                                    beta=1, 
+#                                    dst = norm_image, 
+#                                    norm_type=cv2.NORM_MINMAX, 
+#                                    dtype=cv2.CV_32F)
+        
+        image_vector = norm_image.flatten()
+        if len(imgs) > 0:            
+            imgs = np.vstack((imgs, image_vector))            
+            
+        else:
+            imgs = image_vector
+            
+        
+#         imgs.append(image_vector)
+        
+    imgs = np.matrix(imgs)    
+    return imgs
+
+def main():
+
+#     hidden_dim = 1
+#     data = datasets.load_iris().data
+#     input_dim = len(data[0])
+#     ae = da_tf.Autoencoder(input_dim, hidden_dim)
+#     ae.train(data)
+#     ae.test([[8, 4, 6, 2]])
+    
+    
+    
+#     names = unpickle('./cifar-10-batches-py/batches.meta')['label_names']
+#     data, labels = [], []
+#     for i in range(1, 6):
+#         filename = './cifar-10-batches-py/data_batch_' + str(i)
+#         batch_data = unpickle(filename)
+#         if len(data) > 0:
+#             data = np.vstack((data, batch_data['data']))
+#             labels = np.hstack((labels, batch_data['labels']))
+#         else:
+#             data = batch_data['data']
+#             labels = batch_data['labels']
+#     
+#     data = grayscale(data)
+    
+    images = load_images_from_folder(path)
+    print "shape of input images = ", images.shape
+#     data = grayscale(images)
+#     
+#     x = np.matrix(data)
+     
+      
+    print('Some examples of images we will feed to the autoencoder for training')
+#     plt.rcParams['figure.figsize'] = (10, 10)
+#     num_examples = 5
+#     global g_r; global g_c
+#     for i in range(num_examples):
+#         in_image = np.reshape(images[i], (g_r, g_c))
+#         print in_image.shape
+#         plt.subplot(1, num_examples, i+1)
+#         plt.imshow(in_image, cmap='Greys_r')
+#     plt.show()
+    print "our data = " , images
+  
+    input_dim = np.shape(images)[1]
+    print "our input_dim = " , input_dim
+
+    hidden_dim = 100
+    ae = da_tf.Denoiser(input_dim, hidden_dim)
+    ae.train(images)
+
+
+if __name__== "__main__":
+    main()
+    
+    
+
diff --git a/da_tf.py b/da_tf.py
new file mode 100644
index 0000000..4ad18b6
--- /dev/null
+++ b/da_tf.py
@@ -0,0 +1,83 @@
+
+"""
+Created on Apr 22, 2017
+
+@author: denny
+"""
+
+import tensorflow as tf
+import numpy as np
+import time
+
+def get_batch(X, Xn, size):
+    a = np.random.choice(len(X), size, replace=False)
+
+    return X[a], Xn[a]
+
+class Denoiser:
+
+    def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_rate=0.001):
+        self.epoch = epoch
+        self.batch_size = batch_size
+        self.learning_rate = learning_rate
+
+        self.x = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x')
+        self.x_noised = tf.placeholder(dtype=tf.float32, shape=[None, input_dim], name='x_noised')
+        with tf.name_scope('encode'):
+            self.weights1 = tf.Variable(tf.random_normal([input_dim, hidden_dim], dtype=tf.float32), name='weights')
+            self.biases1 = tf.Variable(tf.zeros([hidden_dim]), name='biases')
+            self.encoded = tf.nn.sigmoid(tf.matmul(self.x_noised, self.weights1) + self.biases1, name='encoded')
+        with tf.name_scope('decode'):
+            weights = tf.Variable(tf.random_normal([hidden_dim, input_dim], dtype=tf.float32), name='weights')
+            biases = tf.Variable(tf.zeros([input_dim]), name='biases')
+            self.decoded = tf.matmul(self.encoded, weights) + biases
+        self.loss = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.x, self.decoded))))
+        self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
+        self.saver = tf.train.Saver()
+
+    def add_noise(self, data):
+        noise_type = 'gaussian'
+        if noise_type == 'gaussian':
+            print "shape of data = ", np.shape(data)
+            n = np.random.normal(0, 0.1, np.shape(data))
+            return data + n
+        if 'mask' in noise_type:
+            frac = float(noise_type.split('-')[1])
+            temp = np.copy(data)
+            for i in temp:
+                n = np.random.choice(len(i), round(frac * len(i)), replace=False)
+                i[n] = 0
+            return temp
+
+    def train(self, data):
+        data_noised = self.add_noise(data)
+        with open('log.csv', 'w') as writer:
+            with tf.Session() as sess:
+                sess.run(tf.global_variables_initializer())
+                for i in range(self.epoch):
+                    for j in range(50):
+                        batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size)
+                        l, _ = sess.run([self.loss, self.train_op], feed_dict={self.x: batch_data, self.x_noised: batch_data_noised})
+                    if i % 10 == 0:
+                        print('epoch {0}: loss = {1}'.format(i, l))
+                        self.saver.save(sess, './model.ckpt')
+                        epoch_time = int(time.time())
+                        row_str = str(epoch_time) + ',' + str(i) + ',' + str(l) + '\n'
+                        writer.write(row_str)
+                        writer.flush()
+                self.saver.save(sess, './model.ckpt')
+
+    def test(self, data):
+        with tf.Session() as sess:
+            self.saver.restore(sess, './model.ckpt')
+            hidden, reconstructed = sess.run([self.encoded, self.decoded], feed_dict={self.x: data})
+        print('input', data)
+        print('compressed', hidden)
+        print('reconstructed', reconstructed)
+        return reconstructed
+
+    def get_params(self):
+        with tf.Session() as sess:
+            self.saver.restore(sess, './model.ckpt')
+            weights, biases = sess.run([self.weights1, self.biases1])
+        return weights, biases
\ No newline at end of file
diff --git a/noise_manage.py b/noise_manage.py
new file mode 100644
index 0000000..b625740
--- /dev/null
+++ b/noise_manage.py
@@ -0,0 +1,77 @@
+'''
+Created on Apr 3, 2017
+
+@author: denny
+'''
+
+
+# 
+# Parameters
+# ----------
+# image : ndarray
+#     Input image data. Will be converted to float.
+# mode : str
+#     One of the following strings, selecting the type of noise to add:
+# 
+#     'gauss'     Gaussian-distributed additive noise.
+#     'poisson'   Poisson-distributed noise generated from the data.
+#     's&p'       Replaces random pixels with 0 or 1.
+#     'speckle'   Multiplicative noise using out = image + n*image,where
+#                 n is uniform noise with specified mean & variance.
+
+
+import numpy as np
+import os
+import cv2
+import numpy as np
+import os, os.path
+import cv2
+import sys
+
+if __name__ == '__main__':
+    pass
+
+
+def noisy(noise_typ,image, _mean, _sigma):
+    if noise_typ == "gauss":
+        row,col,ch= image.shape
+        mean = _mean
+        sigma = _sigma
+#         gauss = np.random.normal(mean,sigma,(row,col,ch))
+        gauss = np.zeros((row,col,ch))
+        cv2.randn(gauss, mean, sigma) 
+        gauss = gauss.reshape(row,col,ch)
+        
+        noisy = image + gauss
+        noisy = cv2.normalize(noisy, noisy, 0, 1, cv2.NORM_MINMAX, dtype=cv2.CV_32FC3 )
+        noisy = np.array(noisy)
+        return noisy
+    elif noise_typ == "s&p":
+        row,col,ch = image.shape
+        s_vs_p = 0.5
+        amount = 0.004
+        out = np.copy(image)
+        # Salt mode
+        num_salt = np.ceil(amount * image.size * s_vs_p)
+        coords = [np.random.randint(0, i - 1, int(num_salt))
+                for i in image.shape]
+        out[coords] = 1
+        
+        # Pepper mode
+        num_pepper = np.ceil(amount* image.size * (1. - s_vs_p))
+        coords = [np.random.randint(0, i - 1, int(num_pepper))
+                for i in image.shape]
+        out[coords] = 0
+        return out
+    elif noise_typ == "poisson":
+        vals = len(np.unique(image))
+        vals = 2 ** np.ceil(np.log2(vals))
+        noisy = np.random.poisson(image * vals) / float(vals)
+        return noisy
+    elif noise_typ =="speckle":
+        row,col,ch = image.shape
+        gauss = np.random.randn(row,col,ch)
+        gauss = gauss.reshape(row,col,ch)        
+        noisy = image + image * gauss
+        return noisy
+  
\ No newline at end of file

From e6a7e3a39d1b04ca671f87235c902f6cf0a7ab27 Mon Sep 17 00:00:00 2001
From: Denny <microwavedenny@gmail.com>
Date: Sun, 23 Apr 2017 21:38:37 -0400
Subject: [PATCH 2/2] adding running version with patch support

---
 da_main.py | 100 ++++++++++++++++++++++-------------------------------
 da_tf.py   |  33 +++++++++++-------
 2 files changed, 63 insertions(+), 70 deletions(-)

diff --git a/da_main.py b/da_main.py
index 803dfe9..9f41c41 100644
--- a/da_main.py
+++ b/da_main.py
@@ -36,6 +36,7 @@ def grayscale(a):
 g_sigma35 = (35.0,35.0,35.0)
 g_sigma45 = (45.0,45.0,45.0)
 g_sigma50 = (50.0,50.0,50.0)
+g_shape = (64,64)
 
 path = "/home/denny/NYU/IMAGE/imagedata/all"
 
@@ -63,7 +64,7 @@ def create_noisy_patches(patches):
     return len, noisy_patches
 
     
-def create_dataset_patches(images, patchshape, patch_shift=4):
+def create_dataset_patches(image, patchshape, patch_shift=4):
     """
     Given an image list, extract patches of a given shape Patch shift
     is the certain shift amount for the successive patch location
@@ -71,27 +72,33 @@ def create_dataset_patches(images, patchshape, patch_shift=4):
     rowstart = 0; colstart = 0
  
     patches = []
-    for active in images:
-        rowstart = 0  
-        while rowstart < active.shape[0] - patchshape[0]:
-             
-            colstart = 0       
-            while colstart < active.shape[1] - patchshape[1]:
-                # Slice tuple indexing the region of our proposed patch
-                region = (slice(rowstart, rowstart + patchshape[0]),
-                          slice(colstart, colstart + patchshape[1]))
-                 
-                # The actual pixels in that region.
-                patch = active[region]
-                
-                # Accept the patch.
-                patches.append(patch)
-                colstart += patch_shift
+    active = image
+    rowstart = 0 
      
+    while rowstart < active.shape[0] - patchshape[0]:
+         
+        colstart = 0       
+        while colstart < active.shape[1] - patchshape[1]:
+            # Slice tuple indexing the region of our proposed patch
+            region = (slice(rowstart, rowstart + patchshape[0]),
+                      slice(colstart, colstart + patchshape[1]))
              
-            rowstart += patch_shift
+            # The actual pixels in that region.
+            patch = active[region]
+            # Accept the patch.
+            patch_vector = patch.flatten()
+            if len(patches) > 0:            
+                patches = np.vstack((patches, patch_vector))            
+            else:
+                patches = patch_vector
+                
+#             patches.append(patch)
+            colstart += patch_shift
+            
+         
+        rowstart += patch_shift
  
-     
+    patches = np.matrix(patches)
     return patches
     
 def plot_patches(patches, fignum=None, low=0, high=0):
@@ -155,13 +162,14 @@ def load_images_from_folder(folder):
 #                                    dst = norm_image, 
 #                                    norm_type=cv2.NORM_MINMAX, 
 #                                    dtype=cv2.CV_32F)
-        
-        image_vector = norm_image.flatten()
+        patches = create_dataset_patches(norm_image, g_shape, patch_shift=32)
+        num_patches = len(patches)
+#         print "no of patches made:", num_patches, patches.shape
+
         if len(imgs) > 0:            
-            imgs = np.vstack((imgs, image_vector))            
-            
+            imgs = np.vstack((imgs, patches))            
         else:
-            imgs = image_vector
+            imgs = patches
             
         
 #         imgs.append(image_vector)
@@ -170,31 +178,9 @@ def load_images_from_folder(folder):
     return imgs
 
 def main():
-
-#     hidden_dim = 1
-#     data = datasets.load_iris().data
-#     input_dim = len(data[0])
-#     ae = da_tf.Autoencoder(input_dim, hidden_dim)
-#     ae.train(data)
-#     ae.test([[8, 4, 6, 2]])
-    
-    
-    
-#     names = unpickle('./cifar-10-batches-py/batches.meta')['label_names']
-#     data, labels = [], []
-#     for i in range(1, 6):
-#         filename = './cifar-10-batches-py/data_batch_' + str(i)
-#         batch_data = unpickle(filename)
-#         if len(data) > 0:
-#             data = np.vstack((data, batch_data['data']))
-#             labels = np.hstack((labels, batch_data['labels']))
-#         else:
-#             data = batch_data['data']
-#             labels = batch_data['labels']
-#     
-#     data = grayscale(data)
     
     images = load_images_from_folder(path)
+    print "no of images made:", len(images), images.shape
     print "shape of input images = ", images.shape
 #     data = grayscale(images)
 #     
@@ -202,20 +188,18 @@ def main():
      
       
     print('Some examples of images we will feed to the autoencoder for training')
-#     plt.rcParams['figure.figsize'] = (10, 10)
-#     num_examples = 5
-#     global g_r; global g_c
-#     for i in range(num_examples):
-#         in_image = np.reshape(images[i], (g_r, g_c))
-#         print in_image.shape
-#         plt.subplot(1, num_examples, i+1)
-#         plt.imshow(in_image, cmap='Greys_r')
-#     plt.show()
-    print "our data = " , images
+    plt.rcParams['figure.figsize'] = (10, 10)
+    num_examples = 5
+    global g_r; global g_c
+    for i in range(num_examples):
+        in_image = np.reshape(images[i], g_shape)
+        print in_image.shape
+        plt.subplot(1, num_examples, i+1)
+        plt.imshow(in_image, cmap='Greys_r')
+    plt.show()
   
     input_dim = np.shape(images)[1]
     print "our input_dim = " , input_dim
-
     hidden_dim = 100
     ae = da_tf.Denoiser(input_dim, hidden_dim)
     ae.train(images)
diff --git a/da_tf.py b/da_tf.py
index 4ad18b6..bda952c 100644
--- a/da_tf.py
+++ b/da_tf.py
@@ -7,6 +7,7 @@
 
 import tensorflow as tf
 import numpy as np
+import cv2
 import time
 
 def get_batch(X, Xn, size):
@@ -35,25 +36,33 @@ def __init__(self, input_dim, hidden_dim, epoch=10000, batch_size=50, learning_r
         self.train_op = tf.train.AdamOptimizer(self.learning_rate).minimize(self.loss)
         self.saver = tf.train.Saver()
 
-    def add_noise(self, data):
+    def add_noise(self, data, mean, sigma):
         noise_type = 'gaussian'
+        noisy_patches = []
+        i = 1
         if noise_type == 'gaussian':
-            print "shape of data = ", np.shape(data)
-            n = np.random.normal(0, 0.1, np.shape(data))
-            return data + n
-        if 'mask' in noise_type:
-            frac = float(noise_type.split('-')[1])
-            temp = np.copy(data)
-            for i in temp:
-                n = np.random.choice(len(i), round(frac * len(i)), replace=False)
-                i[n] = 0
-            return temp
+            for patch in data:
+                print "On ",i,"/",len(data),"\r",
+                i += 1
+                n = np.random.normal(mean, sigma, np.shape(patch))
+                added = patch + n
+                cv2.normalize(added, added, 0, 255, cv2.NORM_MINMAX, dtype=cv2.CV_32FC3 )    
+#                 noisy_patches.append(added)
+                if len(noisy_patches) > 0:     
+                    noisy_patches = np.vstack((noisy_patches, added))    
+                else:
+                    noisy_patches = added
+            
+            noisy_patches = np.matrix(noisy_patches)  
+            return noisy_patches
 
     def train(self, data):
-        data_noised = self.add_noise(data)
+        print "Adding Noise!"
+        data_noised = self.add_noise(data, 0, 10)
         with open('log.csv', 'w') as writer:
             with tf.Session() as sess:
                 sess.run(tf.global_variables_initializer())
+                print "Training to denoise"
                 for i in range(self.epoch):
                     for j in range(50):
                         batch_data, batch_data_noised = get_batch(data, data_noised, self.batch_size)