From 7044a5ab763689d75d8de98367bc695ef584c8ed Mon Sep 17 00:00:00 2001 From: Stefano Piani Date: Tue, 10 Oct 2023 14:32:54 +0200 Subject: [PATCH 1/3] Added a new version of the multirun plotter This commit adds a new version of the profiles_plotter for the multiruns. This new version adds a few new features: the most noticeable is the possibility to add filters to data, for example computing the moving mean. Moreover, it is now also possible to define a different number of levels (so, for example, 3 or 5 instead of 4) and it is also possible to plot the average between multiple levels instead of a fixed one. Finally, it also introduces the possibility to compare more than 4 different plots together --- .../multirun/profiles_plotter/config.yaml | 35 ++ .../profiles_plotter/filters/__init__.py | 0 .../profiles_plotter/filters/filter.py | 24 ++ .../filters/moving_average_filter.py | 186 +++++++++ .../filters/read_filter_description.py | 45 +++ .../multirun/profiles_plotter/plotter.py | 332 ++++++++++++++++ .../profiles_plotter/tools/__init__.py | 0 .../profiles_plotter/tools/data_object.py | 143 +++++++ .../profiles_plotter/tools/read_config.py | 362 ++++++++++++++++++ 9 files changed, 1127 insertions(+) create mode 100644 validation/multirun/profiles_plotter/config.yaml create mode 100644 validation/multirun/profiles_plotter/filters/__init__.py create mode 100644 validation/multirun/profiles_plotter/filters/filter.py create mode 100644 validation/multirun/profiles_plotter/filters/moving_average_filter.py create mode 100644 validation/multirun/profiles_plotter/filters/read_filter_description.py create mode 100644 validation/multirun/profiles_plotter/plotter.py create mode 100644 validation/multirun/profiles_plotter/tools/__init__.py create mode 100644 validation/multirun/profiles_plotter/tools/data_object.py create mode 100644 validation/multirun/profiles_plotter/tools/read_config.py diff --git a/validation/multirun/profiles_plotter/config.yaml b/validation/multirun/profiles_plotter/config.yaml new file mode 100644 index 00000000..96300401 --- /dev/null +++ b/validation/multirun/profiles_plotter/config.yaml @@ -0,0 +1,35 @@ +sources: + enea_raw: + path: /media/internal/disk2TiB/data/FORCINGS_ENEA_RAW_CONVERTED/AVESCAN_OUTPUT/STAT_PROFILES_COMPACT + meshmask: /media/internal/disk2TiB/data/FORCINGS_ENEA_RAW_CONVERTED/meshmask_MITgcm.nc + enea_interpolated: + path: /somewhere/else + meshmask: /media/internal/disk2TiB/data/FORCINGS_ENEA_RAW_CONVERTED/meshmask_MITgcm.nc + +variable_selections: + v1: [P_l, N3n, N1p, N4n, N5s, P_c, O2o] + v2: [N1p, N3n, N4n, O2o, P_l, P_c, DIC, ppn, ALK, DIC, pH, pCO2, O3c, O3h] + var_enea: [THETA, SALT] + +levels: ["0", "0 - 20", "100", "150"] # in meters + +plots: + enea_original: + source: enea_raw + variables: var_enea + color: red + alpha_for_time_series: 0.2 + zorder: 1 + legend: enea + + enea_trend: + source: enea_raw + variables: var_enea + active: true + color: blue + alpha: 1 + zorder: 1 + filter: MovingAverage(10y) + draw_depth_profile: false + +output_dir: /dev/shm diff --git a/validation/multirun/profiles_plotter/filters/__init__.py b/validation/multirun/profiles_plotter/filters/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/validation/multirun/profiles_plotter/filters/filter.py b/validation/multirun/profiles_plotter/filters/filter.py new file mode 100644 index 00000000..055a3051 --- /dev/null +++ b/validation/multirun/profiles_plotter/filters/filter.py @@ -0,0 +1,24 @@ +from abc import ABC, abstractmethod +from typing import Union + +from tools.data_object import DataObject + + +class InvalidFilterDescription(Exception): + pass + + +class FilteredObject(DataObject, ABC): + def __init__(self, data_object: DataObject): + self._original_data = data_object + + +class Filter(ABC): + @abstractmethod + def get_filtered_object(self, data_object) -> FilteredObject: + raise NotImplementedError + + @staticmethod + @abstractmethod + def initialize_from_string(args_str: Union[str, None]): + raise NotImplementedError diff --git a/validation/multirun/profiles_plotter/filters/moving_average_filter.py b/validation/multirun/profiles_plotter/filters/moving_average_filter.py new file mode 100644 index 00000000..07f6e30e --- /dev/null +++ b/validation/multirun/profiles_plotter/filters/moving_average_filter.py @@ -0,0 +1,186 @@ +from datetime import timedelta +import re + +import numpy as np + +from filters.filter import Filter, FilteredObject +from tools.data_object import DataObject + + +class MovingAverageFilteredObject(FilteredObject): + def __init__(self, data_object: DataObject, window_size: int): + super().__init__(data_object) + + self._window_size = int(window_size) + + # How many points to we lose because of the average (the ones on the + # boundary of the time series) + lost_points = self._window_size - 1 + + original_times = self._original_data.get_time_steps() + + if self._window_size % 2 == 1: + lost_per_side = lost_points // 2 + self._time_steps = tuple( + original_times[lost_per_side: - lost_per_side] + ) + else: + lost_per_side = (lost_points - 1) // 2 + new_time_steps = [] + averaged_times = original_times[lost_per_side:- lost_per_side] + for i, t1, in enumerate(averaged_times[:-1]): + t2 = averaged_times[i + 1] + delta_time = t2 - t1 + new_time_steps.append(t1 + delta_time // 2) + self._time_steps = tuple(new_time_steps) + assert len(self._time_steps) == len(original_times) - lost_points + + def get_time_steps(self): + return self._time_steps + + def get_values(self, time_steps, basin, level_index, indicator, coasts=1): + fixed_axis = set() + if not isinstance(basin, slice): + fixed_axis.add('basin') + if not isinstance(level_index, slice): + fixed_axis.add('level') + if not isinstance(coasts, slice): + fixed_axis.add('coasts') + if not isinstance(indicator, slice): + fixed_axis.add('indicator') + + time_axis = self.get_axis('time', fixed_axis) + + if isinstance(time_steps, slice): + if time_steps.stop is None: + new_slice_stop = None + else: + if time_steps.stop < 0: + new_slice_stop = time_steps.stop + else: + new_slice_stop = time_steps.stop + self._window_size - 1 + time_slice = slice(time_steps.start, new_slice_stop) + else: + # Here we can suppose that time_steps is an integer + if time_steps >= 0: + time_slice_start = time_steps + time_slice_stop = time_steps + self._window_size + else: + time_slice_start = time_steps - self._window_size + time_slice_stop = time_steps + time_slice = slice(time_slice_start, time_slice_stop) + + with self._original_data: + original_data = self._original_data.get_values( + time_steps=time_slice, + basin=basin, + level_index=level_index, + indicator=indicator, + shores=coasts + ) + + if isinstance(time_steps, int): + if original_data.shape[time_axis] != self._window_size: + raise IndexError( + 'Index {} outside valid range'.format(time_steps) + ) + + return np.mean(original_data, axis=time_axis) + + all_slice_none = [ + slice(None) for _ in range(len(original_data.shape)) + ] + + time_index_geq_0 = all_slice_none.copy() + time_index_geq_0[time_axis] = slice(1, None) + time_index_geq_0 = tuple(time_index_geq_0) + + cut_first_time_indices = all_slice_none.copy() + cut_first_time_indices[time_axis] = slice(self._window_size - 1, None) + cut_first_time_indices = tuple(cut_first_time_indices) + + cut_last_time_indices = all_slice_none.copy() + cut_last_time_indices[time_axis] = slice(None, - self._window_size) + cut_last_time_indices = tuple(cut_last_time_indices) + + v1 = np.cumsum( + original_data, + axis=time_axis + )[cut_first_time_indices] + + v2 = np.zeros_like(v1) + v2[time_index_geq_0] = np.cumsum( + original_data, + axis=time_axis + )[cut_last_time_indices] + + moving_average = (v1 - v2) / self._window_size + + return moving_average + + +class MovingAverageFilter(Filter): + WINDOW_DESCRIPTION_MASK = re.compile( + r'^(?P\d+)\s{0,2}(?P(d|D|m|M|y|Y)?)\s*$' + ) + + def __init__(self, window_description: str): + mask_match = self.WINDOW_DESCRIPTION_MASK.match(window_description) + if mask_match is None: + raise ValueError( + 'The string that describes the window size of the moving ' + 'average filter must be an integer, optionally followed by one ' + 'of the following letters: d, m or y' + ) + self._window_size = int(mask_match.group('size')) + if mask_match.group('unit') is not None: + self._unit = mask_match.group('unit').lower() + else: + self._unit = None + + def get_filtered_object(self, data_object): + # If there are no units, we can simply return the filtered object as is + if self._unit is None: + return MovingAverageFilteredObject( + data_object, + window_size=self._windows_size + ) + + # Otherwise, we need to estimate the new size of the window + if self._unit == 'd': + unit_time = timedelta(days=1) + elif self._unit == 'm': + unit_time = timedelta(days=30) + else: + unit_time = timedelta(days=365) + + time_steps = data_object.get_time_steps() + time_index = 0 + for time_index, current_time in enumerate(time_steps): + if current_time - time_steps[0] >= unit_time: + break + + if time_index == 0: + raise ValueError('No time steps found in the data_object') + + window_size = self._window_size * time_index + return MovingAverageFilteredObject(data_object, window_size=window_size) + + @staticmethod + def initialize_from_string(args_str): + if args_str is None: + raise ValueError( + 'A moving average filter requires a mandatory argument that ' + 'must be the size of the average window' + ) + + args_split = args_str.split(',') + if len(args_split) > 1: + raise ValueError( + 'A moving average filter requires only one argument. ' + 'Received: {}'.format( + ', '.join([k.strip() for k in args_split]) + ) + ) + window_desc = args_split[0].strip() + return MovingAverageFilter(window_desc) diff --git a/validation/multirun/profiles_plotter/filters/read_filter_description.py b/validation/multirun/profiles_plotter/filters/read_filter_description.py new file mode 100644 index 00000000..1fb7509b --- /dev/null +++ b/validation/multirun/profiles_plotter/filters/read_filter_description.py @@ -0,0 +1,45 @@ +import re +from filters.filter import InvalidFilterDescription +from filters.moving_average_filter import MovingAverageFilter + + +FILTER_DESCRIPTION_MASK = re.compile( + r'^\s*(?P[a-zA-Z][a-zA-Z0-9_]*)((?P\(.*\))?)\s*$' +) + + +# Add here the name of the filter with its association every time you introduce +# a new filter +_FILTER_ASSOCIATION = { + 'movingaverage': MovingAverageFilter +} + + +def read_filter_description(filter_description: str): + mask_match = FILTER_DESCRIPTION_MASK.match(filter_description) + if mask_match is None: + raise InvalidFilterDescription( + 'Invalid filter description! A filter must satisfy the following ' + 'regular expression: {}\nReceived: "{}"'.format( + FILTER_DESCRIPTION_MASK, + filter_description + ) + ) + filter_name = mask_match.group('filter_name') + filter_args = mask_match.group('args') + + filter_name_lower = filter_name.lower() + if filter_name_lower in _FILTER_ASSOCIATION: + # Remove the parenthesis + if filter_args is not None: + filter_args = filter_args[1:-1] + return _FILTER_ASSOCIATION[filter_name_lower].initialize_from_string( + filter_args + ) + + raise InvalidFilterDescription( + 'Invalid filter specified: "{}"\nAllowed values are: {}'.format( + filter_name, + ', '.join(sorted(list(_FILTER_ASSOCIATION.keys()))) + ) + ) diff --git a/validation/multirun/profiles_plotter/plotter.py b/validation/multirun/profiles_plotter/plotter.py new file mode 100644 index 00000000..7a090ea9 --- /dev/null +++ b/validation/multirun/profiles_plotter/plotter.py @@ -0,0 +1,332 @@ +from os import path +from collections.abc import Iterable + +import matplotlib.pyplot as plt +import numpy as np + +from basins import V2 +from commons.mask import Mask +from tools.read_config import read_config_from_file + +try: + from mpi4py import MPI + comm = MPI.COMM_WORLD + rank = comm.Get_rank() + nranks = comm.size + isParallel = True +except ModuleNotFoundError: + rank = 0 + nranks = 1 + isParallel = False + + +MAIN_DIR = path.dirname(path.realpath(__file__)) +CONFIG_FILE = path.join(MAIN_DIR, 'config.yaml') + + +class PlotDrawer: + def __init__(self, plots: Iterable, variable, levels: Iterable, + meshmask_objects=None): + self._plots = tuple(plots) + self._levels = tuple(levels) + self._variable = variable + + if meshmask_objects is None: + self._meshmask_objects = {} + else: + self._meshmask_objects = meshmask_objects.copy() + + draw_depth_profile = False + draw_time_series = False + for plot in self._plots: + if variable not in plot.variables: + continue + if plot.draw_depth_profile: + draw_depth_profile = True + if plot.draw_time_series: + draw_time_series = True + + self._draw_depth_profile = draw_depth_profile + self._draw_time_series = draw_time_series + + if len(self._levels) == 0: + self._draw_time_series = False + + self._empty = not (self._draw_depth_profile or self._draw_time_series) + self._loaded_data = {} + + def is_empty(self): + return self._empty + + @property + def levels(self): + return self._levels + + def load_data(self): + self._loaded_data = {} + for plot in self._plots: + self._loaded_data[plot.name] = plot.get_plot_data(self._variable) + + def _get_plot_mask(self, plot): + if plot.source.meshmask in self._meshmask_objects: + return self._meshmask_objects[plot.source.meshmask] + return Mask(plot.source.meshmask, loadtmask=False) + + def _plot_time_series(self, axis_dict, basin_index: int, indicator=0): + for plot in self._plots: + if not plot.draw_time_series: + continue + + plot_meshmask = self._get_plot_mask(plot) + + if plot.name in self._loaded_data: + plot_data = self._loaded_data[plot.name] + else: + plot_data = plot.get_plot_data(self._variable) + + for i, level in enumerate(self._levels): + plot_x_data = plot_data.get_time_steps() + + # If level is a tuple, we have to compute the average between + # its first number and the second + if isinstance(level, tuple): + if len(level) != 2: + raise ValueError( + 'Levels must be a number of a tuple with two ' + 'elements (the start and the end of the interval ' + 'where an average will be computed). Submitted a ' + 'tuple with {} elements: {}'.format( + len(level), + level + ) + ) + if level[0] is None: + start_level_index = None + else: + start_level_index = plot_meshmask.getDepthIndex( + level[0] + ) + if level[-1] is None: + end_level_index = None + else: + end_level_index = plot_meshmask.getDepthIndex(level[-1]) + # This is because we want to include the level inside + # the average + end_level_index += 1 + + level_slice = slice(start_level_index, end_level_index) + + with plot_data: + y_data_domain = plot_data.get_values( + time_steps=slice(None), + basin=basin_index, + level_index=level_slice, + indicator=indicator # Zero is the mean + ) + plot_y_data = np.mean( + y_data_domain, + axis=plot_data.get_axis( + 'level', + while_fixing={'basin', 'coasts'} + ) + ) + else: + level_index = plot_meshmask.getDepthIndex(level) + with plot_data: + plot_y_data = plot_data.get_values( + time_steps=slice(None), + basin=basin_index, + level_index=level_index, + indicator=indicator # Zero is the mean + ) + + if np.all(np.isnan(plot_y_data)): + continue + + plot_kwargs = {} + for field in ('alpha', 'color', 'zorder'): + if getattr(plot, field) is not None: + plot_kwargs[field] = getattr(plot, field) + if plot.alpha_for_time_series is not None: + plot_kwargs['alpha'] = plot.alpha_for_time_series + + current_axis = axis_dict['L{}'.format(i)] + current_axis.plot( + plot_x_data, + plot_y_data, + label=plot.legend, + **plot_kwargs + ) + + # Add the title to each plot; we need to do this here so the limits + # of the plots are already defined + for i, level in enumerate(self._levels): + current_axis = axis_dict['L{}'.format(i)] + plot_left, plot_right = current_axis.get_xlim() + plot_bottom, plot_top = current_axis.get_ylim() + + axis_title = 'Depth: ' + if isinstance(level, tuple): + if level[0] is None: + start_average_str = '0m' + else: + start_average_str = '{}m'.format(level[0]) + if level[-1] is None: + end_average_str = 'bottom' + else: + end_average_str = '{}m'.format(level[-1]) + depth_str = 'from {} to {}'.format( + start_average_str, + end_average_str + ) + else: + depth_str = '{}m'.format(level) + axis_title += depth_str + + title_x_pos = plot_left + (plot_right - plot_left) * 0.03 + title_y_pos = plot_top - (plot_top - plot_bottom) * 0.05 + current_axis.text( + title_x_pos, + title_y_pos, + axis_title, + fontsize=10, + ha='left', + va='top' + ) + + def _plot_depth_profile(self, axis_dict, basin_index: int): + current_axis = axis_dict['P'] + + ytick_labels = (0, 200, 400, 600, 800, 1000) + current_axis.set_ylim([0, 1000]) + current_axis.set_yticks(ytick_labels) + current_axis.grid() + current_axis.invert_yaxis() + + for plot in self._plots: + if not plot.draw_depth_profile: + continue + + plot_meshmask = self._get_plot_mask(plot) + + if plot.name in self._loaded_data: + plot_data = self._loaded_data[plot.name] + else: + plot_data = plot.get_plot_data(self._variable) + + with plot_data: + plot_x_data = np.mean( + plot_data.get_values( + time_steps=slice(None), + basin=basin_index, + level_index=slice(None), + indicator=0 + ), + axis=0 + ) + plot_y_data = plot_meshmask.zlevels + + plot_kwargs = {} + for field in ('alpha', 'color', 'zorder'): + if getattr(plot, field) is not None: + plot_kwargs[field] = getattr(plot, field) + + current_axis.plot( + plot_x_data, + plot_y_data, + label=plot.legend, + **plot_kwargs + ) + current_axis.legend() + + def plot(self, basin_index, **fig_kw): + if self.is_empty(): + raise ValueError('Required a plot from an empty PlotDrawer') + + if not self._draw_time_series: + fig, axis = plt.subplot() + axis_dict = {'P': axis} + else: + plot_structure = [ + ['L{}'.format(i)] for i in range(len(self._levels)) + ] + if self._draw_depth_profile: + for plot_row in plot_structure: + plot_row.append('P') + fig, axis_dict = plt.subplot_mosaic( + plot_structure, + **fig_kw + ) + + # Share the x-axis between each L plot and the last one + last_l_axis = axis_dict['L{}'.format(len(self._levels) - 1)] + for i in range(len(self._levels[:-1])): + current_axis = axis_dict['L{}'.format(i)] + current_axis.sharex(last_l_axis) + current_axis.xaxis.set_tick_params( + which='both', + labelbottom=False, + labeltop=False + ) + current_axis.xaxis.offsetText.set_visible(False) + + if self._draw_time_series: + self._plot_time_series(axis_dict, basin_index) + if self._draw_depth_profile: + self._plot_depth_profile(axis_dict, basin_index) + + return fig + + +def main(): + config = read_config_from_file(CONFIG_FILE) + + # Create a list of all the variables (for all the plots) and of all the + # meshmasks + variables = set() + meshmask_objects = {} + + for plot in config.plots: + variables.update(plot.variables) + meshmask_path = path.realpath(plot.source.meshmask) + if meshmask_path not in meshmask_objects: + meshmask_objects[meshmask_path] = Mask( + meshmask_path, + loadtmask=False + ) + + # Now we sort the variables, so we ensure that the list is in the same order + # on all the processes + variables = tuple(sorted(list(variables))) + + # Here we produce the plots + for var_name in variables[rank::nranks]: + plot_drawer = PlotDrawer( + config.plots, + var_name, + config.levels, + meshmask_objects + ) + if plot_drawer.is_empty(): + continue + plot_drawer.load_data() + + for basin_index, basin in enumerate(V2.P): + outfile_name = 'Multirun_Profiles.{}.{}.png'.format( + var_name, + basin.name + ) + outfile_path = config.output_dir / outfile_name + + fig = plot_drawer.plot( + basin_index=basin_index, + figsize=(10, 10) + ) + fig.suptitle('{} {}'.format(var_name, basin.name)) + + fig.savefig(outfile_path) + plt.close(fig) + + +if __name__ == '__main__': + main() diff --git a/validation/multirun/profiles_plotter/tools/__init__.py b/validation/multirun/profiles_plotter/tools/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/validation/multirun/profiles_plotter/tools/data_object.py b/validation/multirun/profiles_plotter/tools/data_object.py new file mode 100644 index 00000000..5d9ea9d1 --- /dev/null +++ b/validation/multirun/profiles_plotter/tools/data_object.py @@ -0,0 +1,143 @@ +from abc import ABC, abstractmethod +from collections.abc import Iterable +from os import path +from typing import Union + +from timeseries.plot import read_pickle_file + + +SLICE_TYPE = Union[slice, int] + + +class InvalidAxisSpecified(Exception): + pass + + +class DataObject(ABC): + + @abstractmethod + def get_values(self, time_steps: SLICE_TYPE, basin: SLICE_TYPE, + level_index: SLICE_TYPE, indicator: SLICE_TYPE, + shores: SLICE_TYPE = 1): + raise NotImplementedError + + @abstractmethod + def get_time_steps(self): + raise NotImplementedError + + @staticmethod + def get_axis(axis_name: str, while_fixing: Union[Iterable, None] = None): + """ + Return the index of a specified axis identified by its name. + + :param axis_name: The name of the axis; one among "time", "basin", + "coasts", "level" and "indicator" + :param while_fixing: if the output has been previously sliced removing + some indices, it is possible to insert here the name of the axis that + have been sliced. In this way, the routine keeps track of the axes that + have been removed and rescales the final output in order of obtaining + the correct indices. So, for example, "level" has index 4; if "basin", + which has index 2, has been fixed, then level is rescaled to index 3. + + :return: the index of the axis described by its name + """ + axis_order = ('time', 'basin', 'coasts', 'level', 'indicator') + + if while_fixing is None: + while_fixing = set() + + for current_axis_name in while_fixing: + if current_axis_name not in axis_order: + raise InvalidAxisSpecified( + 'Invalid axis "{}" specified in the "with_fixed_axes" ' + 'argument. The only allowed values names are: {}'.format( + current_axis_name, + ', '.join(axis_order) + ) + ) + + if axis_name not in axis_order: + raise InvalidAxisSpecified( + 'Invalid axis "{}" specified. The only allowed values ' + 'are: '.format(axis_name, ', '.join(axis_order)) + ) + + axis_index = 0 + for name in axis_order: + if name == axis_name: + return axis_index + if name not in while_fixing: + axis_index += 1 + + raise Exception('Internal error; there is a bug in the code!') + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + return + + +class PickleDataObject(DataObject): + BFMv5_dict = { + 'Ac': 'ALK', + 'ppn': 'netPPYc', + 'ppg': 'ruPPYc', + 'ppb': 'ruPBAc', + 'CaCO3flux_dic': 'rcalCARc' + } + + BFMv2_dict = { + 'ALK': 'Ac', + 'netPPYc': 'ppn', + 'netPPYc2': 'ppn', + 'ruPPYc': 'ppg', + 'ruPBAc': 'ppb', + 'rcalCARc': 'CaCO3flux_dic' + } + + def __init__(self, dir_path, var_name): + self.dir_path = dir_path + self.var_name = var_name + + self._loaded = False + self._data = None + self._time_steps = None + + def load(self): + if self._loaded: + return + + filename_candidates = [] + main_filename = path.join(self.dir_path, self.var_name + ".pkl") + filename_candidates.append(main_filename) + + data_class = self.__class__ + for alternative_names in (data_class.BFMv2_dict, data_class.BFMv5_dict): + if self.var_name in alternative_names: + new_var_name = alternative_names[self.var_name] + alternative_filename = path.join( + self.dir_path, + new_var_name + ".pkl" + ) + filename_candidates.append(alternative_filename) + for candidate in filename_candidates: + if path.exists(candidate): + final_filename = candidate + break + else: + raise IOError('File "{}" not found'.format(main_filename)) + + self._data, time_steps = read_pickle_file(final_filename) + self._time_steps = time_steps.Timelist + self._loaded = True + + def get_values(self, time_steps, basin, level_index, indicator, shores=1): + if not self._loaded: + self.load() + return self._data[time_steps, basin, shores, level_index, indicator] + + def get_time_steps(self): + if not self._loaded: + self.load() + return self._time_steps diff --git a/validation/multirun/profiles_plotter/tools/read_config.py b/validation/multirun/profiles_plotter/tools/read_config.py new file mode 100644 index 00000000..0b7897af --- /dev/null +++ b/validation/multirun/profiles_plotter/tools/read_config.py @@ -0,0 +1,362 @@ +from collections import namedtuple +from collections.abc import Callable, Iterable +import yaml +from pathlib import Path +from typing import Any + +from tools.data_object import PickleDataObject +from filters.read_filter_description import read_filter_description + + +EXPECTED_FIELDS = { + 'sources', + 'variable_selections', + 'plots', + 'levels', + 'output_dir' +} + + +class InvalidConfigFile(Exception): + pass + + +class InvalidPlotConfig(InvalidConfigFile): + pass + + +Config = namedtuple( + 'Config', + ('plots', 'levels', 'output_dir') +) + +Source = namedtuple('Source', ('path', 'meshmask')) + + +def read_number(number_str): + try: + return int(number_str) + except ValueError: + return float(number_str) + + +class PlotConfig: + CONFIG_FIELDS = { + 'source', + 'variables', + 'active', + 'color', + 'legend', + 'alpha', + 'alpha_for_time_series', + 'zorder', + 'filter', + 'draw_time_series', + 'draw_depth_profile' + } + + PLOT_NAMES = set() + + def __init__(self, name, source, variables, data_filter=None, active=True, + color=None, alpha=None, alpha_for_time_series=None, + legend=None, zorder=None, draw_time_series=True, + draw_depth_profile=True): + if name in self.PLOT_NAMES: + raise ValueError( + 'A plot named "{}" has already been created'.format(name) + ) + self.PLOT_NAMES.add(name) + + self.name = name + self._source = source + self.variables = variables + + self._active = active + self._filter = data_filter + + self._color = color + self._alpha = alpha + self._alpha_for_time_series = alpha_for_time_series + self._legend = legend + self._zorder = zorder + + self._draw_time_series = bool(draw_time_series) + self._draw_depth_profile = bool(draw_depth_profile) + + def get_plot_data(self, variable): + if variable not in self.variables: + raise ValueError( + 'Plot "{}" does not contain variable "{}"'.format( + self.name, + variable + ) + ) + + data_object = PickleDataObject(self.source.path, variable) + if self._filter is None: + return data_object + + return self._filter.get_filtered_object(data_object) + + @property + def source(self): + return self._source + + def is_active(self): + return self._active + + @property + def color(self): + return self._color + + @property + def alpha(self): + return self._alpha + + @property + def alpha_for_time_series(self): + return self._alpha_for_time_series + + @property + def legend(self): + return self.name if self._legend is None else self._legend + + @property + def zorder(self): + return self._zorder + + @property + def draw_time_series(self): + return self._draw_time_series + + @property + def draw_depth_profile(self): + return self._draw_depth_profile + + @staticmethod + def read_plot_config(plot_name, plot_config, sources, variable_selections): + if not isinstance(plot_config, dict): + raise InvalidPlotConfig( + 'plot_config must be a dictionary that describe a plot.' + 'Received: {}'.format(str(plot_config)) + ) + + for field in plot_config: + if field not in PlotConfig.CONFIG_FIELDS: + raise InvalidPlotConfig( + 'Field "{}" is not an admissible field for a ' + 'plot_config'.format(field) + ) + + if 'source' not in plot_config: + raise InvalidPlotConfig('Mandatory field "source" is missing') + source_name = plot_config['source'] + + if source_name not in sources: + raise InvalidPlotConfig( + 'Plot "{}" uses source "{}", that has not been defined'.format( + plot_name, + source_name + ) + ) + plot_source = sources[source_name] + + if 'variables' not in plot_config: + raise InvalidPlotConfig('Mandatory field "variables" is missing') + variable_selection_name = plot_config['variables'] + if variable_selection_name not in variable_selections: + raise InvalidPlotConfig( + 'Plot "{}" uses "{}" variable selection, that has not been ' + 'defined'.format(plot_name, variable_selection_name) + ) + variables = variable_selections[variable_selection_name] + + plot_config_kwargs = {} + + def read_boolean(field_name): + def bool_casting(x_val): + if x_val is None: + x_val = False + if x_val is not True and x_val is not False: + raise InvalidPlotConfig( + 'The field "{}" must contain a boolean value: ' + 'received {}'.format(field_name, x_val) + ) + return x_val + return bool_casting + + def add_to_kwargs(field_name, casting: Callable[[str], Any] = str): + if field_name in plot_config: + plot_config_kwargs[field_name] = casting( + plot_config[field_name] + ) + + add_to_kwargs('color') + add_to_kwargs('legend') + add_to_kwargs('zorder', int) + add_to_kwargs('alpha', float) + add_to_kwargs('alpha_for_time_series', float) + add_to_kwargs('active', read_boolean('active')) + add_to_kwargs('draw_depth_profile', read_boolean('draw_depth_profile')) + add_to_kwargs('draw_time_series', read_boolean('draw_time_series')) + + plot_filter = None + if 'filter' in plot_config: + f_description = plot_config['filter'] + if f_description is not None and f_description is not False: + plot_filter = read_filter_description(f_description) + + return PlotConfig( + plot_name, + plot_source, + variables, + data_filter=plot_filter, + **plot_config_kwargs + ) + + +def read_config_from_file(config_path): + with open(config_path, 'r') as f: + current_config = read_config(f) + return current_config + + +def read_config(config_datastream): + yaml_content = yaml.safe_load(config_datastream) + + for field in yaml_content: + if field not in EXPECTED_FIELDS: + raise InvalidConfigFile( + 'Invalid field "{}" found inside config file. Admissible ' + 'fields are: {}'.format(field, ', '.join(EXPECTED_FIELDS)) + ) + + if 'sources' not in yaml_content: + raise InvalidConfigFile( + 'Field "sources" not found inside the config file' + ) + + # Read the sources + sources = {} + sources_raw = yaml_content['sources'] + if not isinstance(sources_raw, dict): + raise InvalidConfigFile( + 'Field "sources" must be a dictionary that associates a ' + 'name to a path' + ) + for source_name, source_config in sources_raw.items(): + if not isinstance(source_config, dict): + raise InvalidConfigFile( + 'A source must be associated to a dictionary that describes ' + 'its path and its meshmask. Invalid field for source {}: ' + '{}'.format(source_name, source_config) + ) + for field in source_config: + if field not in ('path', 'meshmask'): + raise InvalidConfigFile( + 'Invalid field "{}" for source "{}"'.format( + source_name, + field + ) + ) + if 'path' not in source_config: + raise InvalidConfigFile( + 'No field "path" specified for source {}'.format(source_name) + ) + if 'meshmask' not in source_config: + raise InvalidConfigFile( + 'No field "meshmask" specified for source {}'.format(source_name) + ) + source_path = Path(str(source_config['path'])) + source_meshmask = Path(str(source_config['meshmask'])) + sources[str(source_name)] = Source(source_path, source_meshmask) + + if 'variable_selections' not in yaml_content: + raise InvalidConfigFile( + 'Field "variable_selections" not found inside the config file' + ) + + # Read the selections of the variables + variable_selections = {} + variable_selections_raw = yaml_content['variable_selections'] + if not isinstance(variable_selections_raw, dict): + raise InvalidConfigFile( + 'Field "variable_selections" must be a dictionary that associates ' + 'a name to a list of biogeochemical variables' + ) + for selection_name, selection_vars in variable_selections_raw.items(): + if not isinstance(selection_vars, Iterable): + raise InvalidConfigFile( + 'Content of variable selection "{}" is not a valid ' + 'list: {}'.format(selection_name, str(selection_vars)) + ) + variable_selections[str(selection_name)] = \ + tuple(str(i) for i in selection_vars) + + # Read levels, ensuring that they are a list of integers or floats + if 'levels' not in yaml_content: + raise InvalidConfigFile( + 'Field "levels" not found inside the config file' + ) + levels_raw = yaml_content['levels'] + levels = [] + if not isinstance(levels_raw, Iterable): + raise InvalidConfigFile( + 'Field "levels" must be a list of levels (in meters)' + ) + for raw_level in levels_raw: + if isinstance(raw_level, str) and '-' in raw_level: + level_split = raw_level.split('-') + if len(level_split) > 2: + raise InvalidConfigFile( + 'Invalid level found: {}'.format(raw_level) + ) + level_start_raw = level_split[0].strip() + level_end_raw = level_split[1].strip() + if level_start_raw == '': + level_start = None + else: + level_start = read_number(level_start_raw) + if level_end_raw == '': + level_end = None + else: + level_end = read_number(level_end_raw) + levels.append((level_start, level_end)) + continue + levels.append(read_number(raw_level)) + levels = tuple(levels) + + # Read the output dir + if 'output_dir' not in yaml_content: + raise InvalidConfigFile( + 'Field "output_dir" not found inside the config file' + ) + output_dir = Path(str(yaml_content['output_dir'])) + + # Finally, the most complicated part; reading the plots + if 'plots' not in yaml_content: + raise InvalidConfigFile( + 'Field "plots" not found inside the config file' + ) + plots = [] + plots_raw = yaml_content['plots'] + if not isinstance(plots_raw, dict): + raise InvalidConfigFile( + 'Fields "plots" must be a dictionary tat associates to a plot name' + 'its configuration' + ) + for plot_name, plot_config in plots_raw.items(): + current_plot = PlotConfig.read_plot_config( + plot_name, + plot_config, + sources, + variable_selections + ) + plots.append(current_plot) + plots = tuple(p for p in plots if p.is_active()) + + return Config( + plots=plots, + levels=levels, + output_dir=output_dir + ) From 592e7910656ce674c3cd0f9da71eb17b3e97a8f5 Mon Sep 17 00:00:00 2001 From: Stefano Piani Date: Wed, 11 Oct 2023 12:36:58 +0200 Subject: [PATCH 2/3] MultirunProfilePlotter: add possibility of using a different config file --- .../multirun/profiles_plotter/plotter.py | 29 +++++++++++++++++-- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/validation/multirun/profiles_plotter/plotter.py b/validation/multirun/profiles_plotter/plotter.py index 7a090ea9..e15c9364 100644 --- a/validation/multirun/profiles_plotter/plotter.py +++ b/validation/multirun/profiles_plotter/plotter.py @@ -1,3 +1,4 @@ +from argparse import ArgumentParser from os import path from collections.abc import Iterable @@ -110,8 +111,8 @@ def _plot_time_series(self, axis_dict, basin_index: int, indicator=0): end_level_index = None else: end_level_index = plot_meshmask.getDepthIndex(level[-1]) - # This is because we want to include the level inside - # the average + # This is because we want to include the last level + # inside the average end_level_index += 1 level_slice = slice(start_level_index, end_level_index) @@ -278,8 +279,30 @@ def plot(self, basin_index, **fig_kw): return fig +def configure_argparse(): + parser = ArgumentParser() + parser.description = ( + "The MultirunProfilePlotter is a script to plot temporal series and " + "depth profiles of several different runs of one or more models. " + "You may configure the behaviour of the script by setting the values " + "of the config.yaml file in the main directory of this script or " + "writing another configuration file yourself." + ) + parser.add_argument( + 'config_file', + type=str, + nargs='?', + default=CONFIG_FILE, + help='The path of the config file used by this script. By default, it ' + 'uses {}'.format(CONFIG_FILE) + ) + return parser.parse_args() + + def main(): - config = read_config_from_file(CONFIG_FILE) + args = configure_argparse() + + config = read_config_from_file(args.config_file) # Create a list of all the variables (for all the plots) and of all the # meshmasks From 8dc94a17861f4d876aef3970a1e63a2324dbb01e Mon Sep 17 00:00:00 2001 From: Stefano Piani Date: Thu, 12 Oct 2023 14:51:38 +0200 Subject: [PATCH 3/3] MultirunProfilePlotter: fixing average among levels This commit fixes a bug in the previous version of the script: the average among the depth levels where computing without weights. Now the script computes the volume of each level and applies these volumes as weights while computingthe average --- .../multirun/profiles_plotter/plotter.py | 186 ++++++++++++++---- 1 file changed, 148 insertions(+), 38 deletions(-) diff --git a/validation/multirun/profiles_plotter/plotter.py b/validation/multirun/profiles_plotter/plotter.py index e15c9364..18f43995 100644 --- a/validation/multirun/profiles_plotter/plotter.py +++ b/validation/multirun/profiles_plotter/plotter.py @@ -1,12 +1,14 @@ from argparse import ArgumentParser from os import path +from collections import namedtuple from collections.abc import Iterable import matplotlib.pyplot as plt import numpy as np -from basins import V2 +from basins import V2, Basin from commons.mask import Mask +from commons.submask import SubMask from tools.read_config import read_config_from_file try: @@ -25,9 +27,16 @@ CONFIG_FILE = path.join(MAIN_DIR, 'config.yaml') +# A BasinSliceVolume describes a slices among vertical levels of a specific +# basin, i.e., for example, all the cells between 10m and 30m in the Adriatic +# Sea (accordingly to a specific meshmask). The levels field should be a tuple +# with two elements: the start and the end of the interval in meters. +BasinSlice = namedtuple('BasinSlice', ['basin_index', 'meshmask', 'levels']) + + class PlotDrawer: def __init__(self, plots: Iterable, variable, levels: Iterable, - meshmask_objects=None): + meshmask_objects=None, average_volume_weights=None): self._plots = tuple(plots) self._levels = tuple(levels) self._variable = variable @@ -36,6 +45,10 @@ def __init__(self, plots: Iterable, variable, levels: Iterable, self._meshmask_objects = {} else: self._meshmask_objects = meshmask_objects.copy() + if average_volume_weights is None: + self._average_volume_weights = {} + else: + self._average_volume_weights = average_volume_weights draw_depth_profile = False draw_time_series = False @@ -69,11 +82,13 @@ def load_data(self): self._loaded_data[plot.name] = plot.get_plot_data(self._variable) def _get_plot_mask(self, plot): - if plot.source.meshmask in self._meshmask_objects: - return self._meshmask_objects[plot.source.meshmask] - return Mask(plot.source.meshmask, loadtmask=False) + real_path = path.realpath(plot.source.meshmask) + if real_path in self._meshmask_objects: + return self._meshmask_objects[real_path] + return Mask(plot.source.meshmask, loadtmask=True) - def _plot_time_series(self, axis_dict, basin_index: int, indicator=0): + def _plot_time_series(self, axis_dict, basin_index: int, basin, + indicator=0): for plot in self._plots: if not plot.draw_time_series: continue @@ -91,31 +106,29 @@ def _plot_time_series(self, axis_dict, basin_index: int, indicator=0): # If level is a tuple, we have to compute the average between # its first number and the second if isinstance(level, tuple): - if len(level) != 2: - raise ValueError( - 'Levels must be a number of a tuple with two ' - 'elements (the start and the end of the interval ' - 'where an average will be computed). Submitted a ' - 'tuple with {} elements: {}'.format( - len(level), - level - ) - ) - if level[0] is None: - start_level_index = None + # Let us check if we already have the weights for this + # average + requested_slice = BasinSlice( + basin_index, + path.realpath(plot.source.meshmask), + level + ) + + level_slice = from_interval_to_slice( + level, + plot_meshmask + ) + + if requested_slice in self._average_volume_weights: + average_weights = \ + self._average_volume_weights[requested_slice] else: - start_level_index = plot_meshmask.getDepthIndex( - level[0] + submask = SubMask(basin, maskobject=plot_meshmask) + average_weights = compute_slice_volume( + level_slice, + plot_meshmask, + submask ) - if level[-1] is None: - end_level_index = None - else: - end_level_index = plot_meshmask.getDepthIndex(level[-1]) - # This is because we want to include the last level - # inside the average - end_level_index += 1 - - level_slice = slice(start_level_index, end_level_index) with plot_data: y_data_domain = plot_data.get_values( @@ -124,12 +137,13 @@ def _plot_time_series(self, axis_dict, basin_index: int, indicator=0): level_index=level_slice, indicator=indicator # Zero is the mean ) - plot_y_data = np.mean( + plot_y_data = np.average( y_data_domain, axis=plot_data.get_axis( 'level', while_fixing={'basin', 'coasts'} - ) + ), + weights=average_weights ) else: level_index = plot_meshmask.getDepthIndex(level) @@ -240,12 +254,12 @@ def _plot_depth_profile(self, axis_dict, basin_index: int): ) current_axis.legend() - def plot(self, basin_index, **fig_kw): + def plot(self, basin_index, basin, **fig_kw): if self.is_empty(): raise ValueError('Required a plot from an empty PlotDrawer') if not self._draw_time_series: - fig, axis = plt.subplot() + fig, axis = plt.subplot(**fig_kw) axis_dict = {'P': axis} else: plot_structure = [ @@ -269,10 +283,13 @@ def plot(self, basin_index, **fig_kw): labelbottom=False, labeltop=False ) - current_axis.xaxis.offsetText.set_visible(False) + # If there are more than 3 time series, hide all the x-ticks + # on each plot beside the bottom one + if len(self._levels[:-1]) > 3: + current_axis.xaxis.offsetText.set_visible(False) if self._draw_time_series: - self._plot_time_series(axis_dict, basin_index) + self._plot_time_series(axis_dict, basin_index, basin) if self._draw_depth_profile: self._plot_depth_profile(axis_dict, basin_index) @@ -299,25 +316,116 @@ def configure_argparse(): return parser.parse_args() +def from_interval_to_slice(level_interval: tuple, meshmask: Mask): + """ + Given an interval in meters, return the slice that identifies that interval + as z-levels of the model. + + :param level_interval: a tuple with two numbers (in meters) + :param meshmask: the meshmask of the model + :return: a slice that, when applied to the levels of the model, returns the + cells whose depth is among the values specified in level_interval + """ + if len(level_interval) != 2: + raise ValueError( + 'Levels must be a number of a tuple with two ' + 'elements (the start and the end of the interval ' + 'where an average will be computed). Submitted a ' + 'tuple with {} elements: {}'.format( + len(level_interval), + level_interval + ) + ) + if level_interval[0] is None: + start_level_index = None + else: + start_level_index = meshmask.getDepthIndex( + level_interval[0] + ) + if level_interval[-1] is None: + end_level_index = None + else: + end_level_index = meshmask.getDepthIndex(level_interval[-1]) + # This is because we want to include the last level + # inside the average + end_level_index += 1 + + level_slice = slice(start_level_index, end_level_index) + return level_slice + + +def compute_slice_volume(level_slice: slice, meshmask: Mask, + basin_submask: SubMask) -> np.ndarray: + """ + Compute the volume of each level for a specific mask and basin + + :param level_slice: Level slice is a slice that selects the levels where we + want to compute the volumes. This is a slice for the levels, it is not in + meters + :param meshmask: The meshmask of the model + :param basin_submask: The submask of the basin + :return: + A 1D array `v` such that `v[i]` is the volume of the i-th level of the slice + """ + e1t = meshmask.e1t + e2t = meshmask.e2t + e3t = meshmask.e3t[level_slice, :] + + slice_volume = np.sum( + e1t * e2t * e3t, + axis=(1, 2), + where=basin_submask.mask[level_slice, :] + ) + return slice_volume + + def main(): args = configure_argparse() config = read_config_from_file(args.config_file) + # Check if at least one of the specified levels requires + averages_in_levels = False + for level in config.levels: + if isinstance(level, tuple): + averages_in_levels = True + break + # Create a list of all the variables (for all the plots) and of all the # meshmasks variables = set() meshmask_objects = {} - for plot in config.plots: variables.update(plot.variables) meshmask_path = path.realpath(plot.source.meshmask) + # Here we read in advance the meshmask objects, so that we can share + # the information among different plots (if they use the same meshmask). + # If we need to compute the volumes of the levels (because there are + # some averages that must be computed), we also read the tmask if meshmask_path not in meshmask_objects: meshmask_objects[meshmask_path] = Mask( meshmask_path, - loadtmask=False + loadtmask=averages_in_levels ) + # And now we prepare the weights for the levels averages + average_volume_weights = {} + if averages_in_levels: + for meshmask_path, meshmask_object in meshmask_objects.items(): + for basin_index, basin in enumerate(V2.P): + submask = SubMask(basin, maskobject=meshmask_object) + for level in config.levels: + if not isinstance(level, tuple): + continue + level_slice = from_interval_to_slice(level, meshmask_object) + selection_weights = compute_slice_volume( + level_slice, + meshmask_object, + submask + ) + basin_slice = BasinSlice(basin_index, meshmask_path, level) + average_volume_weights[basin_slice] = selection_weights + # Now we sort the variables, so we ensure that the list is in the same order # on all the processes variables = tuple(sorted(list(variables))) @@ -328,7 +436,8 @@ def main(): config.plots, var_name, config.levels, - meshmask_objects + meshmask_objects, + average_volume_weights ) if plot_drawer.is_empty(): continue @@ -343,6 +452,7 @@ def main(): fig = plot_drawer.plot( basin_index=basin_index, + basin=basin, figsize=(10, 10) ) fig.suptitle('{} {}'.format(var_name, basin.name))