pyoungkangkim
diff --git a/‎06_02_qa_train.ipynb
+50-81 b/‎06_02_qa_train.ipynb
+50-81
diff --git a/‎06_03_qa_analysis.ipynb
+91-129 b/‎06_03_qa_analysis.ipynb
+91-129
diff --git a/‎model.png
-88 Bytes b/‎model.png
-88 Bytes
diff --git a/‎scripts/download_gutenburg_data.txt renamed to ‎scripts/download_gutenburg_data.sh b/‎scripts/download_gutenburg_data.txt renamed to ‎scripts/download_gutenburg_data.sh
diff --git a/‎utils/.gitignore
-2 b/‎utils/.gitignore
-2
diff --git a/‎utils/callbacks.py
+40 b/‎utils/callbacks.py
+40
diff --git a/‎utils/loaders.py
+315 b/‎utils/loaders.py
+315
@@ -0,0 +1,40 @@
+from keras.callbacks import Callback, LearningRateScheduler
+import numpy as np
+import matplotlib.pyplot as plt
+import os
+
+#### CALLBACKS
+class CustomCallback(Callback):
+    
+    def __init__(self, run_folder, print_every_n_batches, initial_epoch, vae):
+        self.epoch = initial_epoch
+        self.run_folder = run_folder
+        self.print_every_n_batches = print_every_n_batches
+        self.vae = vae
+
+    def on_batch_end(self, batch, logs={}):  
+        if batch % self.print_every_n_batches == 0:
+            z_new = np.random.normal(size = (1,self.vae.z_dim))
+            reconst = self.vae.decoder.predict(np.array(z_new))[0].squeeze()
+
+            filepath = os.path.join(self.run_folder, 'images/img_' + str(self.epoch).zfill(3) + '_' + str(batch) + '.jpg')
+            if len(reconst.shape) == 2:
+                plt.imsave(filepath, reconst, cmap='gray_r')
+            else:
+                plt.imsave(filepath, reconst)
+
+    def on_epoch_begin(self, epoch, logs={}):
+        self.epoch += 1
+
+
+
+def step_decay_schedule(initial_lr, decay_factor=0.5, step_size=1):
+    '''
+    Wrapper function to create a LearningRateScheduler with step decay schedule.
+    '''
+    def schedule(epoch):
+        new_lr = initial_lr * (decay_factor ** np.floor(epoch/step_size))
+        
+        return new_lr
+
+    return LearningRateScheduler(schedule)
@@ -0,0 +1,315 @@
+import pickle
+import os
+
+from keras.datasets import mnist, cifar100,cifar10
+from keras.preprocessing.image import ImageDataGenerator, load_img, save_img, img_to_array
+
+import pandas as pd
+
+import numpy as np
+from os import walk, getcwd
+import h5py
+
+import scipy
+from glob import glob
+
+from keras.applications import vgg19
+from keras import backend as K
+from keras.utils import to_categorical
+
+import pdb
+
+
+class ImageLabelLoader():
+    def __init__(self, image_folder, target_size):
+        self.image_folder = image_folder
+        self.target_size = target_size
+
+    def build(self, att, batch_size, label = None):
+
+        data_gen = ImageDataGenerator(rescale=1./255)
+        if label:
+            data_flow = data_gen.flow_from_dataframe(
+                att
+                , self.image_folder
+                , x_col='image_id'
+                , y_col=label
+                , target_size=self.target_size 
+                , class_mode='other'
+                , batch_size=batch_size
+                , shuffle=True
+            )
+        else:
+            data_flow = data_gen.flow_from_dataframe(
+                att
+                , self.image_folder
+                , x_col='image_id'
+                , target_size=self.target_size 
+                , class_mode='input'
+                , batch_size=batch_size
+                , shuffle=True
+            )
+
+        return data_flow
+
+
+
+
+class DataLoader():
+    def __init__(self, dataset_name, img_res=(256, 256)):
+        self.dataset_name = dataset_name
+        self.img_res = img_res
+
+    def load_data(self, domain, batch_size=1, is_testing=False):
+        data_type = "train%s" % domain if not is_testing else "test%s" % domain
+        path = glob('./data/%s/%s/*' % (self.dataset_name, data_type))
+
+        batch_images = np.random.choice(path, size=batch_size)
+
+        imgs = []
+        for img_path in batch_images:
+            img = self.imread(img_path)
+            if not is_testing:
+                img = scipy.misc.imresize(img, self.img_res)
+
+                if np.random.random() > 0.5:
+                    img = np.fliplr(img)
+            else:
+                img = scipy.misc.imresize(img, self.img_res)
+            imgs.append(img)
+
+        imgs = np.array(imgs)/127.5 - 1.
+
+        return imgs
+
+    def load_batch(self, batch_size=1, is_testing=False):
+        data_type = "train" if not is_testing else "val"
+        path_A = glob('./data/%s/%sA/*' % (self.dataset_name, data_type))
+        path_B = glob('./data/%s/%sB/*' % (self.dataset_name, data_type))
+
+        self.n_batches = int(min(len(path_A), len(path_B)) / batch_size)
+        total_samples = self.n_batches * batch_size
+
+        # Sample n_batches * batch_size from each path list so that model sees all
+        # samples from both domains
+        path_A = np.random.choice(path_A, total_samples, replace=False)
+        path_B = np.random.choice(path_B, total_samples, replace=False)
+
+        for i in range(self.n_batches-1):
+            batch_A = path_A[i*batch_size:(i+1)*batch_size]
+            batch_B = path_B[i*batch_size:(i+1)*batch_size]
+            imgs_A, imgs_B = [], []
+            for img_A, img_B in zip(batch_A, batch_B):
+                img_A = self.imread(img_A)
+                img_B = self.imread(img_B)
+
+                img_A = scipy.misc.imresize(img_A, self.img_res)
+                img_B = scipy.misc.imresize(img_B, self.img_res)
+
+                if not is_testing and np.random.random() > 0.5:
+                        img_A = np.fliplr(img_A)
+                        img_B = np.fliplr(img_B)
+
+                imgs_A.append(img_A)
+                imgs_B.append(img_B)
+
+            imgs_A = np.array(imgs_A)/127.5 - 1.
+            imgs_B = np.array(imgs_B)/127.5 - 1.
+
+            yield imgs_A, imgs_B
+
+    def load_img(self, path):
+        img = self.imread(path)
+        img = scipy.misc.imresize(img, self.img_res)
+        img = img/127.5 - 1.
+        return img[np.newaxis, :, :, :]
+
+    def imread(self, path):
+        return scipy.misc.imread(path, mode='RGB').astype(np.float)
+
+
+
+
+def load_model(model_class, folder):
+    
+    with open(os.path.join(folder, 'params.pkl'), 'rb') as f:
+        params = pickle.load(f)
+
+    model = model_class(*params)
+
+    model.load_weights(os.path.join(folder, 'weights/weights.h5'))
+
+    return model
+
+
+def load_mnist():
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+
+    x_train = x_train.astype('float32') / 255.
+    x_train = x_train.reshape(x_train.shape + (1,))
+    x_test = x_test.astype('float32') / 255.
+    x_test = x_test.reshape(x_test.shape + (1,))
+
+    return (x_train, y_train), (x_test, y_test)
+
+def load_mnist_gan():
+    (x_train, y_train), (x_test, y_test) = mnist.load_data()
+
+    x_train = (x_train.astype('float32') - 127.5) / 127.5
+    x_train = x_train.reshape(x_train.shape + (1,))
+    x_test = (x_test.astype('float32') - 127.5) / 127.5
+    x_test = x_test.reshape(x_test.shape + (1,))
+
+    return (x_train, y_train), (x_test, y_test)
+
+
+
+def load_fashion_mnist(input_rows, input_cols, path='./data/fashion/fashion-mnist_train.csv'):
+    #read the csv data
+    df = pd.read_csv(path)
+    #extract the image pixels
+    X_train = df.drop(columns = ['label'])
+    X_train = X_train.values
+    X_train = (X_train.astype('float32') - 127.5) / 127.5
+    X_train = X_train.reshape(X_train.shape[0], input_rows, input_cols, 1)
+    #extract the labels
+    y_train = df['label'].values
+    
+    return X_train, y_train
+
+def load_safari(folder):
+
+    mypath = os.path.join("./data", folder)
+    txt_name_list = []
+    for (dirpath, dirnames, filenames) in walk(mypath):
+        for f in filenames:
+            if f != '.DS_Store':
+                txt_name_list.append(f)
+                break
+
+    slice_train = int(80000/len(txt_name_list))  ###Setting value to be 80000 for the final dataset
+    i = 0
+    seed = np.random.randint(1, 10e6)
+
+    for txt_name in txt_name_list:
+        txt_path = os.path.join(mypath,txt_name)
+        x = np.load(txt_path)
+        x = (x.astype('float32') - 127.5) / 127.5
+        # x = x.astype('float32') / 255.0
+        
+        x = x.reshape(x.shape[0], 28, 28, 1)
+        
+        y = [i] * len(x)  
+        np.random.seed(seed)
+        np.random.shuffle(x)
+        np.random.seed(seed)
+        np.random.shuffle(y)
+        x = x[:slice_train]
+        y = y[:slice_train]
+        if i != 0: 
+            xtotal = np.concatenate((x,xtotal), axis=0)
+            ytotal = np.concatenate((y,ytotal), axis=0)
+        else:
+            xtotal = x
+            ytotal = y
+        i += 1
+        
+    return xtotal, ytotal
+
+
+
+def load_cifar(label, num):
+    if num == 10:
+        (x_train, y_train), (x_test, y_test) = cifar10.load_data()
+    else:
+        (x_train, y_train), (x_test, y_test) = cifar100.load_data(label_mode = 'fine')
+
+    train_mask = [y[0]==label for y in y_train]
+    test_mask = [y[0]==label for y in y_test]
+
+    x_data = np.concatenate([x_train[train_mask], x_test[test_mask]])
+    y_data = np.concatenate([y_train[train_mask], y_test[test_mask]])
+
+    x_data = (x_data.astype('float32') - 127.5) / 127.5
+ 
+    return (x_data, y_data)
+
+
+def load_celeb(data_name, image_size, batch_size):
+    data_folder = os.path.join("./data", data_name)
+
+    data_gen = ImageDataGenerator(preprocessing_function=lambda x: (x.astype('float32') - 127.5) / 127.5)
+
+    x_train = data_gen.flow_from_directory(data_folder
+                                            , target_size = (image_size,image_size)
+                                            , batch_size = batch_size
+                                            , shuffle = True
+                                            , class_mode = 'input'
+                                            , subset = "training"
+                                                )
+
+    return x_train
+
+
+def load_music(data_name, filename, n_bars, n_steps_per_bar):
+    file = os.path.join("./data", data_name, filename)
+
+    with np.load(file, encoding='bytes') as f:
+        data = f['train']
+
+    data_ints = []
+
+    for x in data:
+        counter = 0
+        cont = True
+        while cont:
+            if not np.any(np.isnan(x[counter:(counter+4)])):
+                cont = False
+            else:
+                counter += 4
+
+        if n_bars * n_steps_per_bar < x.shape[0]:
+            data_ints.append(x[counter:(counter + (n_bars * n_steps_per_bar)),:])
+
+
+    data_ints = np.array(data_ints)
+
+    n_songs = data_ints.shape[0]
+    n_tracks = data_ints.shape[2]
+
+    data_ints = data_ints.reshape([n_songs, n_bars, n_steps_per_bar, n_tracks])
+
+    max_note = 83
+
+    where_are_NaNs = np.isnan(data_ints)
+    data_ints[where_are_NaNs] = max_note + 1
+    max_note = max_note + 1
+
+    data_ints = data_ints.astype(int)
+
+    num_classes = max_note + 1
+
+    
+    data_binary = np.eye(num_classes)[data_ints]
+    data_binary[data_binary==0] = -1
+    data_binary = np.delete(data_binary, max_note,-1)
+
+    data_binary = data_binary.transpose([0,1,2, 4,3])
+    
+    
+
+    
+
+    return data_binary, data_ints, data
+
+
+def preprocess_image(data_name, file, img_nrows, img_ncols):
+
+    image_path = os.path.join('./data', data_name, file)
+
+    img = load_img(image_path, target_size=(img_nrows, img_ncols))
+    img = img_to_array(img)
+    img = np.expand_dims(img, axis=0)
+    img = vgg19.preprocess_input(img)
+    return img
+