diff --git a/Code/dsen2cr_main.py b/Code/dsen2cr_main.py new file mode 100644 index 0000000..449e38e --- /dev/null +++ b/Code/dsen2cr_main.py @@ -0,0 +1,179 @@ +from __future__ import division + +import argparse +import random + +import keras.backend as K +import numpy as np +import tensorflow as tf +import tools.image_metrics as img_met +from dsen2cr_network import DSen2CR_model +from dsen2cr_tools import train_dsen2cr, predict_dsen2cr +from keras.optimizers import Nadam +from keras.utils import multi_gpu_model +from tools.dataIO import get_train_val_test_filelists + +K.set_image_data_format('channels_first') + + +def run_dsen2cr(predict_file=None, resume_file=None): + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% SETUP %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + # TODO implement external hyperparam config file + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Setup model %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + model_name = 'DSen2-CR_001' # model name for training + + # model parameters + num_layers = 16 # B value in paper + feature_size = 256 # F value in paper + + # include the SAR layers as input to model + include_sar_input = True + + # cloud mask parameters + use_cloud_mask = True + cloud_threshold = 0.2 # set threshold for binarisation of cloud mask + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Setup data processing param %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + # input data preprocessing parameters + scale = 2000 + max_val_sar = 2 + clip_min = [[-25.0, -32.5], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] + clip_max = [[0, 0], [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000], + [10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000, 10000]] + + shuffle_train = True # shuffle images at training time + data_augmentation = True # flip and rotate images randomly for data augmentation + + random_crop = True # crop out a part of the input image randomly + crop_size = 128 # crop size for training images + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Setup training %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + dataset_list_filepath = '../Data/datasetfilelist.csv' + + base_out_path = '/path/to/output/model_runs/' + input_data_folder = '/path/to/dataset/parent/folder' + + # training parameters + initial_epoch = 0 # start at epoch number + epochs_nr = 8 # train for this amount of epochs. Checkpoints will be generated at the end of each epoch + batch_size = 16 # training batch size to distribute over GPUs + + # define metric to be optimized + loss = img_met.carl_error + # define metrics to monitor + metrics = [img_met.carl_error, img_met.cloud_mean_absolute_error, + img_met.cloud_mean_squared_error, img_met.cloud_mean_sam, img_met.cloud_mean_absolute_error_clear, + img_met.cloud_psnr, + img_met.cloud_root_mean_squared_error, img_met.cloud_bandwise_root_mean_squared_error, + img_met.cloud_mean_absolute_error_covered, img_met.cloud_ssim, + img_met.cloud_mean_sam_covered, img_met.cloud_mean_sam_clear] + + # define learning rate + lr = 7e-5 + + # initialize optimizer + optimizer = Nadam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=1e-8, schedule_decay=0.004) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Other setup parameters %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + predict_data_type = 'val' # possible options: 'val' or 'test' + + log_step_freq = 1 # frequency of logging + + n_gpus = 1 # set number of GPUs + # multiprocessing optimization setup + use_multi_processing = True + max_queue_size = 2 * n_gpus + workers = 4 * n_gpus + + batch_per_gpu = int(batch_size / n_gpus) + + input_shape = ((13, crop_size, crop_size), (2, crop_size, crop_size)) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Initialize session %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + # Configure Tensorflow session + config = tf.ConfigProto() + # Don't pre-allocate memory; allocate as-needed + config.gpu_options.allow_growth = True + + # Only allow a total % of the GPU memory to be allocated + # config.gpu_options.per_process_gpu_memory_fraction = 0.3 + + # Create a session with the above options specified. + K.tensorflow_backend.set_session(tf.Session(config=config)) + + # Set random seeds for repeatability + random_seed_general = 42 + random.seed(random_seed_general) # random package + np.random.seed(random_seed_general) # numpy package + tf.set_random_seed(random_seed_general) # tensorflow + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Initialize model %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + # single or no-gpu case + if n_gpus <= 1: + model, shape_n = DSen2CR_model(input_shape, + batch_per_gpu=batch_per_gpu, + num_layers=num_layers, + feature_size=feature_size, + use_cloud_mask=use_cloud_mask, + include_sar_input=include_sar_input) + else: + # handle multiple gpus + with tf.device('/cpu:0'): + single_model, shape_n = DSen2CR_model(input_shape, + batch_per_gpu=batch_per_gpu, + num_layers=num_layers, + feature_size=feature_size, + use_cloud_mask=use_cloud_mask, + include_sar_input=include_sar_input) + + model = multi_gpu_model(single_model, gpus=n_gpus) + + model.compile(optimizer=optimizer, loss=loss, metrics=metrics) + print('Model compiled successfully!') + + print("Getting file lists") + train_filelist, val_filelist, test_filelist = get_train_val_test_filelists(dataset_list_filepath) + + print("Number of train files found: ", len(train_filelist)) + print("Number of validation files found: ", len(val_filelist)) + print("Number of test files found: ", len(test_filelist)) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% PREDICT %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + if predict_file is not None: + if predict_data_type == 'val': + predict_filelist = val_filelist + elif predict_data_type == 'test': + predict_filelist = test_filelist + else: + raise ValueError('Prediction data type not recognized.') + + predict_dsen2cr(predict_file, model, predict_data_type, base_out_path, input_data_folder, predict_filelist, + batch_size, clip_min, clip_max, crop_size, input_shape, use_cloud_mask, cloud_threshold, + max_val_sar, scale) + + else: + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% TRAIN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + train_dsen2cr(model, model_name, base_out_path, resume_file, train_filelist, val_filelist, lr, log_step_freq, + shuffle_train, data_augmentation, random_crop, batch_size, scale, clip_max, clip_min, max_val_sar, + use_cloud_mask, cloud_threshold, crop_size, epochs_nr, initial_epoch, input_data_folder, + input_shape, max_queue_size, use_multi_processing, workers) + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% MAIN %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +if __name__ == '__main__': + parser = argparse.ArgumentParser(description='DSen2-CR model code') + parser.add_argument('--predict', action='store', dest='predict_file', help='Predict from model checkpoint.') + parser.add_argument('--resume', action='store', dest='resume_file', help='Resume training from model checkpoint.') + args = parser.parse_args() + + run_dsen2cr(args.predict_file, args.resume_file) diff --git a/Code/dsen2cr_network.py b/Code/dsen2cr_network.py new file mode 100644 index 0000000..eb060c4 --- /dev/null +++ b/Code/dsen2cr_network.py @@ -0,0 +1,70 @@ +import keras.backend as K +import tensorflow as tf +from keras.layers import Conv2D, Concatenate, Activation, Lambda, Add +from keras.models import Model, Input + +K.set_image_data_format('channels_first') + + +def resBlock(input_l, feature_size, kernel_size, scale=0.1): + """Definition of Residual Block to be repeated in body of network.""" + tmp = Conv2D(feature_size, kernel_size, kernel_initializer='he_uniform', padding='same')(input_l) + tmp = Activation('relu')(tmp) + tmp = Conv2D(feature_size, kernel_size, kernel_initializer='he_uniform', padding='same')(tmp) + + tmp = Lambda(lambda x: x * scale)(tmp) + + return Add()([input_l, tmp]) + + +def DSen2CR_model(input_shape, + batch_per_gpu=2, + num_layers=32, + feature_size=256, + use_cloud_mask=True, + include_sar_input=True): + """Definition of network structure. """ + + global shape_n + + # define dimensions + input_opt = Input(shape=input_shape[0]) + input_sar = Input(shape=input_shape[1]) + + if include_sar_input: + x = Concatenate(axis=1)([input_opt, input_sar]) + else: + x = input_opt + + # Treat the concatenation + x = Conv2D(feature_size, (3, 3), kernel_initializer='he_uniform', padding='same')(x) + x = Activation('relu')(x) + + # main body of network as succession of resblocks + for i in range(num_layers): + x = resBlock(x, feature_size, kernel_size=[3, 3]) + + # One more convolution + x = Conv2D(input_shape[0][0], (3, 3), kernel_initializer='he_uniform', padding='same')(x) + + # Add first layer (long skip connection) + x = Add()([x, input_opt]) + + if use_cloud_mask: + # the hacky trick with global variables and with lambda functions is needed to avoid errors when + # pickle saving the model. Tensors are not pickable. + # This way, the Lambda function has no special arguments and is "encapsulated" + + shape_n = tf.shape(input_opt) + + def concatenate_array(x): + global shape_n + return K.concatenate([x, K.zeros(shape=(batch_per_gpu, 1, shape_n[2], shape_n[3]))], axis=1) + + x = Concatenate(axis=1)([x, input_opt]) + + x = Lambda(concatenate_array)(x) + + model = Model(inputs=[input_opt, input_sar], outputs=x) + + return model, shape_n diff --git a/Code/dsen2cr_tools.py b/Code/dsen2cr_tools.py new file mode 100644 index 0000000..b0b7f36 --- /dev/null +++ b/Code/dsen2cr_tools.py @@ -0,0 +1,183 @@ +import csv +import os +from random import shuffle + +from dataIO import make_dir, DataGenerator, process_predicted +from keras.callbacks import ModelCheckpoint, CSVLogger +from keras.utils import plot_model +from myCallbacks import CSV_NBatchLogger, NBatchLogger, TensorBoardWrapper + + +def train_dsen2cr(model, model_name, base_out_path, resume_file, train_filelist, val_filelist, lr, log_step_freq, + shuffle_train, data_augmentation, random_crop, batch_size, scale, clip_max, clip_min, max_val_sar, + use_cloud_mask, cloud_threshold, crop_size, epochs_nr, initial_epoch, input_data_folder, input_shape, + max_queue_size, use_multi_processing, workers): + """Start or resume training of DSen2-CR model.""" + + print('Training model name: {}'.format(model_name)) + + out_path_train = make_dir(os.path.join(base_out_path, model_name, '/')) + + # generate model information and metadata + plot_model(model, to_file=os.path.join(out_path_train, model_name + 'model.png'), show_shapes=True, + show_layer_names=True) + model_yaml = model.to_yaml() + with open(out_path_train + model_name + "model.yaml", 'w') as yaml_file: + yaml_file.write(model_yaml) + print("Model information files created at ", out_path_train) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Initialize callbacks %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + # instantiate model checkpoint callback + model_filepath = os.path.join(out_path_train, model_name + '_{epoch:02d}-{val_loss:.4f}' + '.hdf5') + checkpoint = ModelCheckpoint(model_filepath, + monitor='val_loss', + verbose=1, + save_best_only=False, + save_weights_only=False, + mode='auto') + + # instantiate csv logging callback + csv_filepath = os.path.join(out_path_train, model_name + '_csvlog.csv') + csv_logger = CSVLogger(csv_filepath, append=True, separator=";") + csv_batch_logger = CSV_NBatchLogger(1, out_path_train, model_name, initial_epoch, separator=';') + + # instantiate NBatch logger + batch_logger = NBatchLogger(log_step_freq, out_path_train, model_name, initial_epoch, lr) + + # instantiate Tensorboard logger + # extract sample from validation dataset + val_filelist_tensorboard = val_filelist + shuffle(val_filelist_tensorboard) + val_filelist_tensorboard = val_filelist_tensorboard[0:batch_size] + + params = {'input_dim': input_shape, + 'batch_size': batch_size, + 'shuffle': shuffle_train, + 'scale': scale, + 'include_target': True, + 'data_augmentation': False, + 'random_crop': False, + 'crop_size': crop_size, + 'clip_min': clip_min, + 'clip_max': clip_max, + 'input_data_folder': input_data_folder, + 'use_cloud_mask': use_cloud_mask, + 'max_val_sar': max_val_sar, + 'cloud_threshold': cloud_threshold} + val_tensorboard_generator = DataGenerator(val_filelist_tensorboard, **params) + + tensorboard = TensorBoardWrapper(val_tensorboard_generator, input_dim=input_shape, nb_steps=1, + batch_size=batch_size, log_dir=out_path_train, histogram_freq=1, + write_graph=False, + batch_nr=batch_size, write_grads=True, update_freq=500, + learning_phase=False) + + # define callbacks list + callbacks_list = [checkpoint, csv_logger, batch_logger, csv_batch_logger, tensorboard] + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Initialize training %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + params = {'input_dim': input_shape, + 'batch_size': batch_size, + 'shuffle': shuffle_train, + 'scale': scale, + 'include_target': True, + 'data_augmentation': data_augmentation, + 'random_crop': random_crop, + 'crop_size': crop_size, + 'clip_min': clip_min, + 'clip_max': clip_max, + 'input_data_folder': input_data_folder, + 'use_cloud_mask': use_cloud_mask, + 'max_val_sar': max_val_sar, + 'cloud_threshold': cloud_threshold} + training_generator = DataGenerator(train_filelist, **params) + + params = {'input_dim': input_shape, + 'batch_size': batch_size, + 'shuffle': shuffle_train, + 'scale': scale, + 'include_target': True, + 'data_augmentation': False, # keep false + 'random_crop': False, + 'crop_size': crop_size, + 'clip_min': clip_min, + 'clip_max': clip_max, + 'input_data_folder': input_data_folder, + 'use_cloud_mask': use_cloud_mask, + 'max_val_sar': max_val_sar, + 'cloud_threshold': cloud_threshold + } + + validation_generator = DataGenerator(val_filelist, **params) + + # %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% Run training %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + print('Training starts...') + + if resume_file is not None: + print("Will resume from the weights in file {}".format(resume_file)) + model.load_model(resume_file) + + model.fit_generator(generator=training_generator, + validation_data=validation_generator, + epochs=epochs_nr, + verbose=1, + callbacks=callbacks_list, + shuffle=False, + initial_epoch=initial_epoch, + use_multiprocessing=use_multi_processing, + max_queue_size=max_queue_size, + workers=workers) + + +def predict_dsen2cr(predict_file, model, model_name, base_out_path, input_data_folder, predict_filelist, batch_size, + clip_min, clip_max, crop_size, input_shape, use_cloud_mask, cloud_threshold, max_val_sar, + scale): + print("Predicting using file: {}".format(predict_file)) + print("Using this model: {}".format(model_name)) + + # load the model weights at checkpoint + model.load_weights(predict_file) + + out_path_predict = make_dir(os.path.join(base_out_path, model_name)) + predicted_images_path = make_dir(os.path.join(out_path_predict, 'images_output/')) + + print("Initializing generator for prediction and evaluation") + params = {'input_dim': input_shape, + 'batch_size': 1, + 'shuffle': False, + 'scale': scale, + 'include_target': True, + 'data_augmentation': False, + 'random_crop': False, + 'crop_size': crop_size, + 'clip_min': clip_min, + 'clip_max': clip_max, + 'input_data_folder': input_data_folder, + 'use_cloud_mask': use_cloud_mask, + 'max_val_sar': max_val_sar, + 'cloud_threshold': cloud_threshold} + predict_generator = DataGenerator(predict_filelist, **params) + + eval_csv_name = out_path_predict + 'eval.csv' + print("Storing evaluation metrics at ", eval_csv_name) + + with open(eval_csv_name, 'a') as eval_csv_fh: + eval_writer = csv.writer(eval_csv_fh, dialect='excel') + eval_writer.writerow(model.metrics_names) + + for i, (data, y) in enumerate(predict_generator): + print("Processing file number ", i) + # get evaluation metrics + eval_results = model.test_on_batch(data, y) + # write evaluation metrics + eval_writer.writerow(eval_results) + + # predict output image + predicted = model.predict_on_batch(data) + # process predicted image + process_predicted(predicted, predict_filelist[i * batch_size:i * batch_size + batch_size], + predicted_images_path, + scale, cloud_threshold, input_data_folder) + + print("Prediction finished with success!") diff --git a/Code/tools/dataIO.py b/Code/tools/dataIO.py new file mode 100644 index 0000000..2ceb40f --- /dev/null +++ b/Code/tools/dataIO.py @@ -0,0 +1,420 @@ +import csv +import os +import os.path + +import keras +import matplotlib +import numpy as np +import rasterio +import scipy.signal as scisig +from matplotlib import pyplot as plt +from tools.feature_detectors import get_cloud_cloudshadow_mask + + +def make_dir(dir_path): + if os.path.isdir(dir_path): + print("WARNING: Folder {} exists and content may be overwritten!") + else: + os.makedirs(dir_path) + + return dir_path + + +def get_train_val_test_filelists(listpath): + with open(listpath) as f: + reader = csv.reader(f, delimiter='\t') + filelist = list(reader) + + train_filelist = [f[:] for f in filelist if f[0] == '1'] + val_filelist = [f[:] for f in filelist if f[0] == '2'] + test_filelist = [f[:] for f in filelist if f[0] == '3'] + + return train_filelist, val_filelist, test_filelist + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Output%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Output%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Output%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +def get_info_quartet(ID, predicted_images_path, input_data_folder): + scene_name = ID[4] + filepath_sar = os.path.join(input_data_folder, ID[1], ID[4]).lstrip() + filepath_cloudFree = os.path.join(input_data_folder, ID[2], ID[4]).lstrip() + filepath_cloudy = os.path.join(input_data_folder, ID[3], ID[4]).lstrip() + + return scene_name[:-4], filepath_sar, filepath_cloudFree, filepath_cloudy + + +def get_rgb_preview(r, g, b, sar_composite=False): + if not sar_composite: + + # stack and move to zero + rgb = np.dstack((r, g, b)) + rgb = rgb - np.nanmin(rgb) + + # treat saturated images, scale values + if np.nanmax(rgb) == 0: + rgb = 255 * np.ones_like(rgb) + else: + rgb = 255 * (rgb / np.nanmax(rgb)) + + # replace nan values before final conversion + rgb[np.isnan(rgb)] = np.nanmean(rgb) + + return rgb.astype(np.uint8) + + else: + # generate SAR composite + HH = r + HV = g + + HH = np.clip(HH, -25.0, 0) + HH = (HH + 25.1) * 255 / 25.1 + HV = np.clip(HV, -32.5, 0) + HV = (HV + 32.6) * 255 / 32.6 + + rgb = np.dstack((np.zeros_like(HH), HH, HV)) + + return rgb.astype(np.uint8) + + +def get_raw_data(path): + with rasterio.open(path, driver='GTiff') as src: + image = src.read() + + # checkimage for nans + image[np.isnan(image)] = np.nanmean(image) + + return image.astype('float32') + + +def get_preview(file, predicted_file, bands, brighten_limit=None, sar_composite=False): + if not predicted_file: + with rasterio.open(file) as src: + r, g, b = src.read(bands) + else: + # file is actually the predicted array + r = file[bands[0] - 1] + g = file[bands[1] - 1] + b = file[bands[2] - 1] + + if brighten_limit is None: + return get_rgb_preview(r, g, b, sar_composite) + else: + r = np.clip(r, 0, brighten_limit) + g = np.clip(g, 0, brighten_limit) + b = np.clip(b, 0, brighten_limit) + return get_rgb_preview(r, g, b, sar_composite) + + +def generate_output_images(predicted, ID, predicted_images_path, input_data_folder, cloud_threshold): + scene_name, filepath_sar, filepath_cloudFree, filepath_cloudy = get_info_quartet(ID, + predicted_images_path, + input_data_folder) + + print("Generating quartet for ", scene_name) + + sar_preview = get_preview(filepath_sar, False, [1, 2, 2], sar_composite=True) + + opt_bands = [4, 3, 2] # R, G, B bands (S2 channel numbers) + cloudFree_preview = get_preview(filepath_cloudFree, False, opt_bands, brighten_limit=2000) + cloudy_preview = get_preview(filepath_cloudy, False, opt_bands) + cloudy_preview_brightened = get_preview(filepath_cloudy, False, opt_bands, brighten_limit=2000) + + predicted_preview = get_preview(predicted, True, opt_bands, 2000) + + cloud_mask = get_cloud_cloudshadow_mask(get_raw_data(filepath_cloudy), cloud_threshold) + + save_single_images(sar_preview, cloudy_preview, cloudFree_preview, predicted_preview, cloudy_preview_brightened, + cloud_mask, predicted_images_path, scene_name) + + return + + +def save_single_image(image, out_path, name): + plt.figure(frameon=False) + plt.imshow(image) + plt.gca().get_xaxis().set_visible(False) + plt.gca().get_yaxis().set_visible(False) + plt.axis('off') + plt.savefig(os.path.join(out_path, name + '.png'), dpi=600, bbox_inches='tight', pad_inches=0) + plt.close() + + return + + +def save_single_cloudmap(image, out_path, name): + cmap = matplotlib.colors.ListedColormap(['black', 'gray', 'white']) + + bounds = [-1, -0.5, 0.5, 1] + norm = matplotlib.colors.BoundaryNorm(bounds, cmap.N) + + plt.figure() + plt.imshow(image, cmap=cmap, norm=norm, vmin=-1, vmax=1) + + cb = plt.colorbar(aspect=40, pad=0.01) + cb.ax.yaxis.set_tick_params(pad=0.9, length=2) + + cb.ax.yaxis.set_ticks([0.33 / 2, 0.5, 1 - 0.33 / 2]) + cb.ax.yaxis.set_ticklabels(['Shadow', 'Clear', 'Cloud']) + + plt.gca().get_xaxis().set_visible(False) + plt.gca().get_yaxis().set_visible(False) + plt.savefig(os.path.join(out_path, name + '.png'), dpi=600, bbox_inches='tight', pad_inches=0) + plt.close() + + return + + +def save_single_images(sar_preview, cloudy_preview, cloudFree_preview, predicted_preview, cloudy_preview_brightened, + cloud_mask, predicted_images_path, scene_name): + out_path = make_dir(os.path.join(predicted_images_path, scene_name)) + + save_single_image(sar_preview, out_path, "inputsar") + save_single_image(cloudy_preview, out_path, "input") + save_single_image(cloudFree_preview, out_path, "inputtarg") + save_single_image(predicted_preview, out_path, "inputpred") + save_single_image(cloudy_preview_brightened, out_path, "inputbr") + save_single_cloudmap(cloud_mask, out_path, "cloudmask") + + return + + +def process_predicted(predicted, ID, predicted_images_path, scale, cloud_threshold, input_data_folder): + for i, data_image in enumerate(predicted): + data_image *= scale + generate_output_images(data_image, ID[i], predicted_images_path, input_data_folder, cloud_threshold) + + return + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Input%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Input%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + + +# %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%Input%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% +class DataGenerator(keras.utils.Sequence): + """DataGenerator for Keras routines.""" + + def __init__(self, + list_IDs, + batch_size=32, + input_dim=((13, 256, 256), (2, 256, 256)), + scale=2000, + shuffle=True, + include_target=True, + data_augmentation=False, + random_crop=False, + crop_size=128, + clip_min=None, + clip_max=None, + folder_input='./', + use_cloud_mask=True, + max_val_sar=5, + cloud_threshold=0.2 + ): + + if clip_min is None: + clip_min = [[-25.0, -32.5], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]] + self.input_dim = input_dim + self.batch_size = batch_size + self.list_IDs = list_IDs + self.nr_images = len(self.list_IDs) + self.indexes = np.arange(self.nr_images) + self.scale = scale + self.shuffle = shuffle + self.include_target = include_target + self.data_augmentation = data_augmentation + self.random_crop = random_crop + self.crop_size = crop_size + self.max_val = max_val_sar + + self.clip_min = clip_min + self.clip_max = clip_max + + self.folder_input = folder_input + self.use_cloud_mask = use_cloud_mask + self.cloud_threshold = cloud_threshold + + self.augment_rotation_param = np.repeat(0, self.nr_images) + self.augment_flip_param = np.repeat(0, self.nr_images) + self.random_crop_paramx = np.repeat(0, self.nr_images) + self.random_crop_paramy = np.repeat(0, self.nr_images) + + self.on_epoch_end() + + print("Generator initialized") + + def __len__(self): + """Gets the number of batches per epoch""" + return int(np.floor(self.nr_images / self.batch_size)) + + def __getitem__(self, index): + """Generate one batch of data""" + # Generate indexes of the batch from shuffled indices list + indexes = self.indexes[index * self.batch_size:(index + 1) * self.batch_size] + + # Find list of IDs + list_IDs_temp = [self.list_IDs[k] for k in indexes] + + if self.include_target: + # Generate data + X, y = self.__data_generation(list_IDs_temp, self.augment_rotation_param[indexes], + self.augment_flip_param[indexes], self.random_crop_paramx[indexes], + self.random_crop_paramy[indexes]) + return X, y + else: + X = self.__data_generation(list_IDs_temp, self.augment_rotation_param[indexes], + self.augment_flip_param[indexes], self.random_crop_paramx[indexes], + self.random_crop_paramy[indexes]) + return X + + def on_epoch_end(self): + """Update indexes after each epoch.""" + + if self.shuffle: + np.random.shuffle(self.indexes) + + if self.data_augmentation: + self.augment_rotation_param = np.random.randint(0, 4, self.nr_images) + self.augment_flip_param = np.random.randint(0, 3, self.nr_images) + + if self.random_crop: + self.random_crop_paramx = np.random.randint(0, self.crop_size, self.nr_images) + self.random_crop_paramy = np.random.randint(0, self.crop_size, self.nr_images) + return + + def __data_generation(self, list_IDs_temp, augment_rotation_param_temp, augment_flip_param_temp, + random_crop_paramx_temp, random_crop_paramy_temp): + + input_opt_batch, cloud_mask_batch = self.get_batch(list_IDs_temp, augment_rotation_param_temp, + augment_flip_param_temp, random_crop_paramx_temp, + random_crop_paramy_temp, data_type=3) + + input_sar_batch = self.get_batch(list_IDs_temp, augment_rotation_param_temp, augment_flip_param_temp, + random_crop_paramx_temp, random_crop_paramy_temp, data_type=1) + + if self.include_target: + output_opt_batch = self.get_batch(list_IDs_temp, augment_rotation_param_temp, augment_flip_param_temp, + random_crop_paramx_temp, random_crop_paramy_temp, data_type=2) + if self.use_cloud_mask > 0: + output_opt_cloud_batch = [np.append(output_opt_batch[sample], cloud_mask_batch[sample], axis=0) for + sample in range(len(output_opt_batch))] + output_opt_cloud_batch = np.asarray(output_opt_cloud_batch) + return ([input_opt_batch, input_sar_batch], [output_opt_cloud_batch]) + else: + return ([input_opt_batch, input_sar_batch], [output_opt_batch]) + elif not self.include_target: + # for prediction step where target is predicted + return ([input_opt_batch, input_sar_batch]) + + def get_image_data(self, paramx, paramy, path): + # with block not working with window kw + src = rasterio.open(path, 'r', driver='GTiff') + image = src.read(window=((paramx, paramx + self.crop_size), (paramy, paramy + self.crop_size))) + src.close() + image[np.isnan(image)] = np.nanmean(image) # fill holes and artifacts + return image + + def get_opt_image(self, path, paramx, paramy): + + image = self.get_image_data(paramx, paramy, path) + + return image.astype('float32') + + def get_sar_image(self, path, paramx, paramy): + + image = self.get_image_data(paramx, paramy, path) + + medianfilter_onsar = False + if medianfilter_onsar: + image[0] = scisig.medfilt2d(image[0], 7) + image[1] = scisig.medfilt2d(image[1], 7) + + return image.astype('float32') + + def get_data_image(self, ID, data_type, paramx, paramy): + + data_path = os.path.join(self.folder_input, ID[data_type], ID[4]).lstrip() + + if data_type == 2 or data_type == 3: + data_image = self.get_opt_image(data_path, paramx, paramy) + elif data_type == 1: + data_image = self.get_sar_image(data_path, paramx, paramy) + else: + print('Error! Data type invalid') + + return data_image + + def get_normalized_data(self, data_image, data_type): + + shift_data = False + + shift_values = [[0, 0], [1300., 981., 810., 380., 990., 2270., 2070., 2140., 2200., 650., 15., 1600., 680.], + [1545., 1212., 1012., 713., 1212., 2476., 2842., 2775., 3174., 546., 24., 1776., 813.]] + + # SAR + if data_type == 1: + for channel in range(len(data_image)): + data_image[channel] = np.clip(data_image[channel], self.clip_min[data_type - 1][channel], + self.clip_max[data_type - 1][channel]) + data_image[channel] -= self.clip_min[data_type - 1][channel] + data_image[channel] = self.max_val * (data_image[channel] / ( + self.clip_max[data_type - 1][channel] - self.clip_min[data_type - 1][channel])) + if shift_data: + data_image -= self.max_val / 2 + # OPT + elif data_type == 2 or data_type == 3: + for channel in range(len(data_image)): + data_image[channel] = np.clip(data_image[channel], self.clip_min[data_type - 1][channel], + self.clip_max[data_type - 1][channel]) + if shift_data: + data_image[channel] -= shift_values[data_type - 1][channel] + + data_image /= self.scale + + return data_image + + def get_batch(self, list_IDs_temp, augment_rotation_param_temp, augment_flip_param_temp, random_crop_paramx_temp, + random_crop_paramy_temp, data_type): + + if data_type == 1: + dim = self.input_dim[1] + else: + dim = self.input_dim[0] + + batch = np.empty((self.batch_size, *dim)).astype('float32') + cloud_mask_batch = np.empty((self.batch_size, self.input_dim[0][1], self.input_dim[0][2])).astype('float32') + + for i, ID in enumerate(list_IDs_temp): + + data_image = self.get_data_image(ID, data_type, random_crop_paramx_temp[i], random_crop_paramy_temp[i]) + if self.data_augmentation: + if not augment_flip_param_temp[i] == 0: + data_image = np.flip(data_image, augment_flip_param_temp[i]) + if not augment_rotation_param_temp[i] == 0: + data_image = np.rot90(data_image, augment_rotation_param_temp[i], (1, 2)) + + if data_type == 3 and self.use_cloud_mask: + cloud_mask = get_cloud_cloudshadow_mask(data_image, self.cloud_threshold) + cloud_mask[cloud_mask != 0] = 1 + cloud_mask_batch[i,] = cloud_mask + + data_image = self.get_normalized_data(data_image, data_type) + + batch[i,] = data_image + + cloud_mask_batch = cloud_mask_batch[:, np.newaxis, :, :] + + if data_type == 3: + return batch, cloud_mask_batch + else: + return batch diff --git a/Code/tools/feature_detectors.py b/Code/tools/feature_detectors.py new file mode 100644 index 0000000..9588e2f --- /dev/null +++ b/Code/tools/feature_detectors.py @@ -0,0 +1,97 @@ +import numpy as np +import scipy +import scipy.signal as scisig + + +# naming conventions: +# ['QA60', 'B1','B2', 'B3', 'B4', 'B5','B6','B7', 'B8',' B8A', 'B9', 'B10', 'B11','B12'] +# ['QA60','cb', 'blue', 'green', 'red', 're1','re2','re3','nir', 'nir2', 'waterVapor', 'cirrus','swir1', 'swir2']) +# [ 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]) #gdal +# [ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]) #numpy +# [ BB BG BR BNIR BSWIR1 BSWIR2 + +# ge. Bands 1, 2, 3, 8, 11, and 12 were utilized as BB , BG , BR , BNIR , BSWIR1 , and BSWIR2, respectively. + +def get_rescaled_data(data, limits): + return (data - limits[0]) / (limits[1] - limits[0]) + + +def get_normalized_difference(channel1, channel2): + subchan = channel1 - channel2 + sumchan = channel1 + channel2 + sumchan[sumchan == 0] = 0.001 # checking for 0 divisions + return subchan / sumchan + + +def get_shadow_mask(data_image): + # get data between 0 and 1 + data_image = data_image / 10000. + + (ch, r, c) = data_image.shape + shadow_mask = np.zeros((r, c)).astype('float32') + + BB = data_image[1] + BNIR = data_image[7] + BSWIR1 = data_image[11] + + CSI = (BNIR + BSWIR1) / 2. + + t3 = 3 / 4 + T3 = np.min(CSI) + t3 * (np.mean(CSI) - np.min(CSI)) + + t4 = 5 / 6 + T4 = np.min(BB) + t4 * (np.mean(BB) - np.min(BB)) + + shadow_tf = np.logical_and(CSI < T3, BB < T4) + + shadow_mask[shadow_tf] = -1 + shadow_mask = scisig.medfilt2d(shadow_mask, 5) + + return shadow_mask + + +def get_cloud_mask(data_image, cloud_threshold, binarize=False, use_moist_check=False): + data_image = data_image / 10000. + (ch, r, c) = data_image.shape + + # Cloud until proven otherwise + score = np.ones((r, c)).astype('float32') + # Clouds are reasonably bright in the blue and aerosol/cirrus bands. + score = np.minimum(score, get_rescaled_data(data_image[1], [0.1, 0.5])) + score = np.minimum(score, get_rescaled_data(data_image[0], [0.1, 0.3])) + score = np.minimum(score, get_rescaled_data((data_image[0] + data_image[10]), [0.15, 0.2])) + # Clouds are reasonably bright in all visible bands. + score = np.minimum(score, get_rescaled_data((data_image[3] + data_image[2] + data_image[1]), [0.2, 0.8])) + + if use_moist_check: + # Clouds are moist + ndmi = get_normalized_difference(data_image[7], data_image[11]) + score = np.minimum(score, get_rescaled_data(ndmi, [-0.1, 0.1])) + + # However, clouds are not snow. + ndsi = get_normalized_difference(data_image[2], data_image[11]) + score = np.minimum(score, get_rescaled_data(ndsi, [0.8, 0.6])) + + box_size = 7 + box = np.ones((box_size, box_size)) / (box_size ** 2) + score = scipy.ndimage.morphology.grey_closing(score, size=(5, 5)) + score = scisig.convolve2d(score, box, mode='same') + + score = np.clip(score, 0.00001, 1.0) + + if binarize: + score[score >= cloud_threshold] = 1 + score[score < cloud_threshold] = 0 + + return score + + +def get_cloud_cloudshadow_mask(data_image, cloud_threshold): + cloud_mask = get_cloud_mask(data_image, cloud_threshold, binarize=True) + shadow_mask = get_shadow_mask(data_image) + + cloud_cloudshadow_mask = np.zeros_like(cloud_mask) + cloud_cloudshadow_mask[shadow_mask < 0] = -1 + cloud_cloudshadow_mask[cloud_mask > 0] = 1 + + return cloud_cloudshadow_mask diff --git a/Code/tools/image_metrics.py b/Code/tools/image_metrics.py new file mode 100644 index 0000000..db96a38 --- /dev/null +++ b/Code/tools/image_metrics.py @@ -0,0 +1,141 @@ +import keras.backend as K +import tensorflow as tf + + +def cloud_mean_absolute_error(y_true, y_pred): + """Computes the MAE over the full image.""" + return K.mean(K.abs(y_pred[:, 0:13, :, :] - y_true[:, 0:13, :, :])) + + +def cloud_mean_squared_error(y_true, y_pred): + """Computes the MSE over the full image.""" + return K.mean(K.square(y_pred[:, 0:13, :, :] - y_true[:, 0:13, :, :])) + + +def cloud_root_mean_squared_error(y_true, y_pred): + """Computes the RMSE over the full image.""" + return K.sqrt(K.mean(K.square(y_pred[:, 0:13, :, :] - y_true[:, 0:13, :, :]))) + + +def cloud_bandwise_root_mean_squared_error(y_true, y_pred): + return K.mean(K.sqrt(K.mean(K.square(y_pred[:, 0:13, :, :] - y_true[:, 0:13, :, :]), axis=[2, 3]))) + + +def cloud_ssim(y_true, y_pred): + """Computes the SSIM over the full image.""" + y_true = y_true[:, 0:13, :, :] + y_pred = y_pred[:, 0:13, :, :] + + y_true *= 2000 + y_pred *= 2000 + + y_true = tf.transpose(y_true, [0, 2, 3, 1]) + y_pred = tf.transpose(y_pred, [0, 2, 3, 1]) + + ssim = tf.image.ssim(y_true, y_pred, max_val=10000.0) + ssim = tf.reduce_mean(ssim) + + return ssim + + +def get_sam(y_true, y_predict): + """Computes the SAM array.""" + mat = tf.multiply(y_true, y_predict) + mat = tf.reduce_sum(mat, 1) + mat = tf.div(mat, K.sqrt(tf.reduce_sum(tf.multiply(y_true, y_true), 1))) + mat = tf.div(mat, K.sqrt(tf.reduce_sum(tf.multiply(y_predict, y_predict), 1))) + mat = tf.acos(K.clip(mat, -1, 1)) + + return mat + + +def cloud_mean_sam(y_true, y_predict): + """Computes the SAM over the full image.""" + mat = get_sam(y_true[:, 0:13, :, :], y_predict[:, 0:13, :, :]) + + return tf.reduce_mean(mat) + + +def cloud_mean_sam_covered(y_true, y_pred): + """Computes the SAM over the covered image parts.""" + cloud_cloudshadow_mask = y_true[:, -1:, :, :] + target = y_true[:, 0:13, :, :] + predicted = y_pred[:, 0:13, :, :] + + if K.sum(cloud_cloudshadow_mask) == 0: + return 0.0 + + sam = get_sam(target, predicted) + sam = tf.expand_dims(sam, 1) + sam = K.sum(cloud_cloudshadow_mask * sam) / K.sum(cloud_cloudshadow_mask) + + return sam + + +def cloud_mean_sam_clear(y_true, y_pred): + """Computes the SAM over the clear image parts.""" + clearmask = K.ones_like(y_true[:, -1:, :, :]) - y_true[:, -1:, :, :] + predicted = y_pred[:, 0:13, :, :] + input_cloudy = y_pred[:, -14:-1, :, :] + + if K.sum(clearmask) == 0: + return 0.0 + + sam = get_sam(input_cloudy, predicted) + sam = tf.expand_dims(sam, 1) + sam = K.sum(clearmask * sam) / K.sum(clearmask) + + return sam + + +def cloud_psnr(y_true, y_predict): + """Computes the PSNR over the full image.""" + y_true *= 2000 + y_predict *= 2000 + rmse = K.sqrt(K.mean(K.square(y_predict[:, 0:13, :, :] - y_true[:, 0:13, :, :]))) + + return 20.0 * (K.log(10000.0 / rmse) / K.log(10.0)) + + +def cloud_mean_absolute_error_clear(y_true, y_pred): + """Computes the SAM over the clear image parts.""" + clearmask = K.ones_like(y_true[:, -1:, :, :]) - y_true[:, -1:, :, :] + predicted = y_pred[:, 0:13, :, :] + input_cloudy = y_pred[:, -14:-1, :, :] + + if K.sum(clearmask) == 0: + return 0.0 + + clti = clearmask * K.abs(predicted - input_cloudy) + clti = K.sum(clti) / (K.sum(clearmask) * 13) + + return clti + + +def cloud_mean_absolute_error_covered(y_true, y_pred): + """Computes the SAM over the covered image parts.""" + cloud_cloudshadow_mask = y_true[:, -1:, :, :] + predicted = y_pred[:, 0:13, :, :] + target = y_true[:, 0:13, :, :] + + if K.sum(cloud_cloudshadow_mask) == 0: + return 0.0 + + ccmaec = cloud_cloudshadow_mask * K.abs(predicted - target) + ccmaec = K.sum(ccmaec) / (K.sum(cloud_cloudshadow_mask) * 13) + + return ccmaec + + +def carl_error(y_true, y_pred): + """Computes the Cloud-Adaptive Regularized Loss (CARL)""" + cloud_cloudshadow_mask = y_true[:, -1:, :, :] + clearmask = K.ones_like(y_true[:, -1:, :, :]) - y_true[:, -1:, :, :] + predicted = y_pred[:, 0:13, :, :] + input_cloudy = y_pred[:, -14:-1, :, :] + target = y_true[:, 0:13, :, :] + + cscmae = K.mean(clearmask * K.abs(predicted - input_cloudy) + cloud_cloudshadow_mask * K.abs( + predicted - target)) + 1.0 * K.mean(K.abs(predicted - target)) + + return cscmae diff --git a/Code/tools/myCallbacks.py b/Code/tools/myCallbacks.py new file mode 100644 index 0000000..5aff130 --- /dev/null +++ b/Code/tools/myCallbacks.py @@ -0,0 +1,145 @@ +import csv +import time + +import keras.backend as K +import numpy as np +from keras.callbacks import Callback +from keras.callbacks import TensorBoard + + +class NBatchLogger(Callback): + """ + A Logger that log average performance per `display` steps. + """ + + def __init__(self, display, out_path, model_nr, initial_epoch, lr): + self.model_nr = model_nr + self.lr = lr + self.step = 0 + self.display = display + self.metric_cache = {} + self.epochnr = initial_epoch + self.out_path = out_path + self.step_info_filename = self.out_path + self.model_nr + '_steplossesinfo.txt' + open(self.step_info_filename, 'w').close() + self.switch = False + self.start = time.time() + self.end = time.time() + self.startbig = time.time() + + def on_epoch_end(self, epoch, logs=None): + self.epochnr += 1 + + def on_batch_end(self, batch, logs={}): + self.step += 1 + for k in self.params['metrics']: + if k in logs: + self.metric_cache[k] = self.metric_cache.get(k, 0) + logs[k] + if self.step % self.display == 0: + # print("finished batch") + + if self.switch == True: + # print("finished batch1") + self.end = time.time() + # print(self.end - self.start) + self.switch = False + else: + # print("finished batch2") + self.start = time.time() + self.switch = True + + if self.step == 50: + print("50: ", time.time() - self.startbig) + if self.step == 100: + print("100: ", time.time() - self.startbig) + if self.step == 150: + print("150: ", time.time() - self.startbig) + if self.step == 200: + print("200: ", time.time() - self.startbig) + + metrics_log = '' + for (k, v) in self.metric_cache.items(): + val = v / self.display + if abs(val) > 1e-3: + metrics_log += ' %.4f' % (val) + else: + metrics_log += ' %.4e' % (val) + + current_lr = K.get_value(self.model.optimizer.lr) + with open(self.step_info_filename, 'a') as self.step_info_file: + self.step_info_file.write('{} {} {} {} {} \n'.format(self.step, + self.params['steps'], self.epochnr, + metrics_log, current_lr)) + self.metric_cache.clear() + + +class CSV_NBatchLogger(Callback): + """ + A Logger that log average performance per `display` steps. + """ + + def __init__(self, display, out_path, model_nr, initial_epoch, separator=',', append=True): + # naming of output file + self.model_nr = model_nr + self.out_path = out_path + self.filename = self.out_path + self.model_nr + '_steplossesinfoCSV.csv' + + self.display = display # saving frequency in steps + self.epochnr = initial_epoch + self.append = append + + self.sep = separator + + self.file_flags = '' + self._open_args = {'newline': '\n'} + + self.step = 0 + self.metric_cache = {} + + open(self.filename, 'w').close() + + def on_epoch_end(self, epoch, logs=None): + self.epochnr += 1 + + def on_batch_end(self, batch, logs={}): + self.step += 1 + + if self.step == 1: + keys = [i for i in logs.keys()] + keys.append('lr') + with open(self.filename, 'w', newline='') as f: + write_outfile = csv.writer(f) + write_outfile.writerow(keys) + + values = [i for i in logs.values()] + values.append(K.get_value(self.model.optimizer.lr)) + with open(self.filename, 'a', newline='') as f: + write_outfile = csv.writer(f) + write_outfile.writerow(values) + + # restart accumulating losses + self.metric_cache.clear() + + +class TensorBoardWrapper(TensorBoard): + """Sets the self.validation_data property for use with TensorBoard callback.""" + + def __init__(self, batch_gen, nb_steps, input_dim, batch_nr, learning_phase, **kwargs): + super().__init__(**kwargs) + self.batch_gen = batch_gen # The generator. + self.nb_steps = nb_steps # Number of times to call next() on the generator. + self.input_dim = input_dim + self.batch_nr = batch_nr + self.learning_phase = learning_phase + + def on_epoch_end(self, epoch, logs): + + for i, data in enumerate(self.batch_gen): + (ib, tb) = data + + if self.learning_phase: + self.validation_data = [ib[0], ib[1], tb[0], np.ones((self.batch_nr,)), False] + else: + self.validation_data = [ib[0], ib[1], tb[0], np.ones((self.batch_nr,))] + + return super().on_epoch_end(epoch, logs) diff --git a/Data/datasetfilelist.csv b/Data/datasetfilelist.csv new file mode 100644 index 0000000..82ff4e0 --- /dev/null +++ b/Data/datasetfilelist.csv @@ -0,0 +1,9 @@ +1 s1 s2_cloudFree s2_cloudy ROIs1158_spring_101_0.tif +1 s1 s2_cloudFree s2_cloudy ROIs1158_spring_101_1.tif +1 s1 s2_cloudFree s2_cloudy ROIs1158_spring_101_10.tif +2 s1 s2_cloudFree s2_cloudy ROIs1158_spring_132_0.tif +2 s1 s2_cloudFree s2_cloudy ROIs1158_spring_132_1.tif +2 s1 s2_cloudFree s2_cloudy ROIs1158_spring_132_10.tif +3 s1 s2_cloudFree s2_cloudy ROIs1158_spring_106_0.tif +3 s1 s2_cloudFree s2_cloudy ROIs1158_spring_106_1.tif +3 s1 s2_cloudFree s2_cloudy ROIs1158_spring_106_10.tif \ No newline at end of file diff --git a/Docker/Dockerfile b/Docker/Dockerfile new file mode 100644 index 0000000..a876c88 --- /dev/null +++ b/Docker/Dockerfile @@ -0,0 +1,64 @@ +ARG cuda_version=9.0 +ARG cudnn_version=7 +FROM nvidia/cuda:${cuda_version}-cudnn${cudnn_version}-devel + +# Install system packages +RUN apt-get update && apt-get install -y --no-install-recommends \ + bzip2 \ + g++ \ + git \ + graphviz \ + libgl1-mesa-glx \ + libhdf5-dev \ + openmpi-bin \ + wget && \ + rm -rf /var/lib/apt/lists/* + +# Install conda +ENV CONDA_DIR /opt/conda +ENV PATH $CONDA_DIR/bin:$PATH + +RUN wget --quiet --no-check-certificate https://repo.continuum.io/miniconda/Miniconda3-4.2.12-Linux-x86_64.sh && \ + echo "c59b3dd3cad550ac7596e0d599b91e75d88826db132e4146030ef471bb434e9a *Miniconda3-4.2.12-Linux-x86_64.sh" | sha256sum -c - && \ + /bin/bash /Miniconda3-4.2.12-Linux-x86_64.sh -f -b -p $CONDA_DIR && \ + rm Miniconda3-4.2.12-Linux-x86_64.sh && \ + echo export PATH=$CONDA_DIR/bin:'$PATH' > /etc/profile.d/conda.sh + +# Install Python packages and keras +ENV NB_USER keras +ENV NB_UID 1000 + +RUN useradd -m -s /bin/bash -N -u $NB_UID $NB_USER && \ + chown $NB_USER $CONDA_DIR -R && \ + mkdir -p /src && \ + chown $NB_USER /src + +USER $NB_USER + +ARG python_version=3.7 + +RUN conda install -y python=${python_version} && \ + pip install --upgrade pip && \ + conda install \ + tensorflow-gpu=1.15.0 \ + keras=2.2.4 \ + bcolz \ + h5py \ + matplotlib=2.1.1 \ + mkl \ + nose \ + numpy=1.13.3 \ + scipy \ + rasterio \ + pydot \ + pygpu \ + pyyaml \ + six + +ENV PYTHONPATH='/src/:$PYTHONPATH' + +WORKDIR /src + +EXPOSE 8888 + +CMD bash diff --git a/README.md b/README.md new file mode 100644 index 0000000..bb97301 --- /dev/null +++ b/README.md @@ -0,0 +1,117 @@ +# Cloud removal in Sentinel-2 imagery using a deep residual neural network and SAR-optical data fusion +![Paper preview](doc/paper.JPG) +> Example results from the final setup of DSen2-CR using the CARL loss. Left is the input cloudy image, middle is the predicted image, left is the cloud-free target image. +> +This repository contains the code and models for the paper +> Meraner, A., Ebel, P., Zhu, X. X., & Schmitt, M. (2020). Cloud removal in Sentinel-2 imagery using a deep residual neural network and SAR-optical data fusion. ISPRS Journal of Photogrammetry and Remote Sensing, 166, 333-346. + +The paper can be accessed openly at [the ISPRS page](https://doi.org/10.1016/j.isprsjprs.2020.05.013). + +If you use this code, models or dataset for your research, please cite us accordingly: +```bibtex +@article{Meraner2020, +title = "Cloud removal in Sentinel-2 imagery using a deep residual neural network and SAR-optical data fusion", +journal = "ISPRS Journal of Photogrammetry and Remote Sensing", +volume = "166", +pages = "333 - 346", +year = "2020", +issn = "0924-2716", +doi = "https://doi.org/10.1016/j.isprsjprs.2020.05.013", +url = "http://www.sciencedirect.com/science/article/pii/S0924271620301398", +author = "Andrea Meraner and Patrick Ebel and Xiao Xiang Zhu and Michael Schmitt", +keywords = "Cloud removal, Optical imagery, SAR-optical, Data fusion, Deep learning, Residual network", +} +``` + +# Code +--- + +**NOTE** + +The code in this repository has been created in my early Python years and might not be the most elegant in some parts. I apologize for eventual issues or possible bugs. + +Should you notice something in the code, please feel free to create a Github issue (or, even better, a pull request :)), or let me know at the address [andrea.meraner@eumetsat.int](mailto:andrea.meraner@eumetsat.int) ! + +--- + +## Installation +The network is written in Keras with Tensorflow as backend. It is strongly advised to use GPU support to run the models. + +A conda environment with the required dependencies can be created with +```bash +conda create -n dsen2cr_env +conda activate dsen2cr_env +conda install -c conda-forge python=3.7 tensorflow=1.15.0 keras=2.2.4 numpy scipy rasterio pydot graphviz h5py +``` + +Alternatively, a Dockerfile is provided in `Docker/Dockerfile` which can be used to create a Docker image including CUDA. + +Note: +This code has been mainly written at the end of 2018/start of 2019 with the Python packages versions available at that time. A usage with updated packages might require some modification of the code. +If you try this code with updated libraries, please let me know your findings ([andrea.meraner@eumetsat.int](mailto:andrea.meraner@eumetsat.int)). + +To clone the repo: +```bash +git clone git@github.com:ameraner/dsen2-cr.git +cd dsen2-cr +``` + +## Usage +### Basic Commands +A new model can be trained from scratch by simply launching +```bash +cd Code/ +python dsen2cr_main.py +``` +The setup and hyperparameters can be tuned directly in the first lines of the main code. + +To resume the training from a previoulsy saved checkpoint, type +```bash +python dsen2cr_main.py --resume path/to/checkpoint.h5 +``` + +To predict images and evaluate the metrics of a trained network, do +```bash +python dsen2cr_main.py --predict path/to/checkpoint.h5 +``` + +### Dataset Paths +The main code will look for the paths to training/validation/test data in the csv file `Data/datasetfilelist.csv`. +An example is provided in the repository. The first column of each entry is an integer, where `1` defines a training sample, +`2` a validation sample, and `3` a test sample. The second, third, and fourth column indicate the subfolder names where the +Sentinel-1, Sentinel-2 Cloudfree, and Sentinel-2 Cloudy images are located respectively. The fifth column finally states the +filename of the image, that must be the same in the three folders. +The three subfolders must be located in the path defined by the variable `input_data_folder` in the main script. + +# Trained Model Checkpoints +The full DSen2-CR model trained by optimizing the CARL loss can be downloaded from Google Drive [here](https://drive.google.com/file/d/1L3YUVOnlg67H5VwlgYO9uC9iuNlq7VMg/view?usp=sharing). + +The full model trained on a plain L1 loss can be downloaded [here](https://drive.google.com/file/d/1zv4_91Yr2IYyYDoqhZw8KpnfvfLhkuBB/view?usp=sharing). The network trained on CARL but without SAR input +can be found [here](https://drive.google.com/file/d/1VHZa5-lX68mA2FbHeCiQsUq13oECw9DA/view?usp=sharing). The network trained without SAR, and on plain L1 loss, can be found [here](https://drive.google.com/file/d/11Th6UwKMXla7LGxsFJXwj-Jx9bSKlWUH/view?usp=sharing). + + +# Dataset +The dataset used in this work is called SEN12MS-CR. It is now publicly available for download [here](https://mediatum.ub.tum.de/1554803). +If you use this dataset for your research, please cite our related IEEE TGRS paper +> Ebel, P., Meraner, A., Schmitt, M., & Zhu, X. X. (2020). Multisensor Data Fusion for Cloud Removal in Global and All-Season Sentinel-2 Imagery. IEEE Transactions on Geoscience and Remote Sensing. + +describing the dataset release. The paper can be accessed +for free at [the IEEE explore page](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9211498). +```bibtex +@article{Ebel2020, + author={P. {Ebel} and A. {Meraner} and M. {Schmitt} and X. X. {Zhu}}, + journal={IEEE Transactions on Geoscience and Remote Sensing}, + title={Multisensor Data Fusion for Cloud Removal in Global and All-Season Sentinel-2 Imagery}, + year={2020}, + volume={}, + number={}, + pages={1-13}, + doi={10.1109/TGRS.2020.3024744}} +``` + +# Credits +Although now heavily modified and expanded, this code was originally based on the code by [Charis Lanaras](https://github.com/lanha) +available in [the DSen2 repo](https://github.com/lanha/DSen2). Also the network used by this work is, as the name suggests, +heavily based on the original DSen2 network (see [related paper](https://www.sciencedirect.com/science/article/abs/pii/S0924271618302636)). +I am grateful to the authors for making the original source code available. + diff --git a/doc/paper.JPG b/doc/paper.JPG new file mode 100644 index 0000000..5fdc7a8 Binary files /dev/null and b/doc/paper.JPG differ