|
| 1 | +""" |
| 2 | +Modified from: https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly.html |
| 3 | +""" |
| 4 | +import numpy as np |
| 5 | + |
| 6 | +class DataGenerator(object): |
| 7 | + '''Generates data for Keras''' |
| 8 | + def __init__(self, batch_size=32, Y_encoder=None, shuffle=True, dataroot=None): |
| 9 | + 'Initialization' |
| 10 | + self.batch_size = batch_size |
| 11 | + self.shuffle = shuffle |
| 12 | + self.Y_encoder = Y_encoder |
| 13 | + self.dataroot = dataroot |
| 14 | + |
| 15 | + |
| 16 | + def generate(self, list_IDs): |
| 17 | + '''Generates batches of samples''' |
| 18 | + # Infinite loop |
| 19 | + while 1: |
| 20 | + # Generate order of exploration of dataset |
| 21 | + indexes = self.__get_exploration_order(list_IDs) |
| 22 | + |
| 23 | + # Generate batches |
| 24 | + imax = int(len(indexes)/self.batch_size) |
| 25 | + for i in range(imax): |
| 26 | + # Find list of IDs |
| 27 | + list_IDs_temp = [list_IDs[k] for k in indexes[i*self.batch_size:(i+1)*self.batch_size]] |
| 28 | + |
| 29 | + # Generate data |
| 30 | + try: |
| 31 | + X, Y = self.__data_generation(list_IDs_temp) |
| 32 | + except IOError: |
| 33 | + continue |
| 34 | + |
| 35 | + yield X, Y |
| 36 | + |
| 37 | + def __get_exploration_order(self, list_IDs): |
| 38 | + '''Generates order of exploration''' |
| 39 | + # Find exploration order |
| 40 | + indexes = np.arange(len(list_IDs)) |
| 41 | + if self.shuffle == True: |
| 42 | + np.random.shuffle(indexes) |
| 43 | + |
| 44 | + return indexes |
| 45 | + |
| 46 | + def __data_generation(self, list_IDs_temp): |
| 47 | + '''Generates data of batch_size samples''' # X : (n_samples, v_size, v_size, v_size, n_channels) |
| 48 | + # Initialization |
| 49 | + |
| 50 | + loaded_X = [np.load(self.dataroot + ID) for ID in list_IDs_temp] |
| 51 | + x_dim, y_dim = min([x.shape for x in loaded_X]) |
| 52 | + standardized_X = [x[...,:(y_dim-1)] for x in loaded_X] |
| 53 | + |
| 54 | + |
| 55 | + X = np.empty((self.batch_size, x_dim, y_dim-1, 1)) |
| 56 | + Y = np.empty((self.batch_size), dtype = int) |
| 57 | + |
| 58 | + # Generate data |
| 59 | + for i, ID in enumerate(list_IDs_temp): |
| 60 | + # transform to log |
| 61 | + ref_X = standardized_X[i] |
| 62 | + #ref_X[:201][:] *= 50000 |
| 63 | + #ref_X[402:603][:] *= 50000 |
| 64 | + ref_X[ref_X == 0] = np.finfo(dtype='float16').tiny |
| 65 | + ref_X[:201][:] = np.log10(ref_X[:201][:]) |
| 66 | + ref_X[402:603][:] = np.log10(ref_X[402:603][:]) |
| 67 | + # Store volume |
| 68 | + X[i, :, :, 0] = ref_X |
| 69 | + #X[i, :, :, 0] = standardized_X[i] |
| 70 | + # Store class |
| 71 | + split = ID[:-4].split('_') |
| 72 | + Y[i] = self.Y_encoder.transform([split[2] + '_' + split[3]])[0] |
| 73 | + |
| 74 | + return X, sparsify(Y) |
| 75 | + |
| 76 | +def sparsify(y): |
| 77 | + '''Returns labels in binary NumPy array''' |
| 78 | + n_classes = 168 |
| 79 | + return np.array([[1 if y[i] == j else 0 for j in range(n_classes)] |
| 80 | + for i in range(y.shape[0])]) |
| 81 | + |
0 commit comments