diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index ad79729f..532eba56 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -3245,23 +3245,144 @@ def remove_extension(self, ext): return self - def convert_extensions( - self, df_map, extension_name, agg_func="sum", drop_not_bridged_index=True + def extension_convert( + self, + df_map, + extension_name, + extension_col_name="extension", + agg_func="sum", + drop_not_bridged_index=True, + unit_column_orig="unit_orig", + unit_column_new="unit_new", + ignore_columns=None, ): - """Builds a new extension based on conversion of existing ones + """Apply the convert function to all extensions + + Internally that calls the Extension.convert function for all extensions. + + See the Extension.convert function for more details. + TODO: put all details here + + + Parameters + ---------- + + df_map : pd.DataFrame + The DataFrame with the mapping of the old to the new classification. + This requires a specific structure: + + - Constraining data (e.g. stressors, regions, sectors) can be + either in the index or columns of df_orig. The need to have the same + name as the named index or column in df_orig. The algorithm searches + for matching data in df_orig based on all constraining columns in df_map. + + - Bridge columns are columns with '__' in the name. These are used to + map (bridge) some/all of the constraining columns in df_orig to the new + classification. + + - One column "factor", which gives the multiplication factor for the + conversion. If it is missing, it is set to 1. + + + This is better explained with an example. + Assuming a original dataframe df_orig with + index names 'stressor' and 'compartment' and column name 'region', + the characterizing dataframe could have the following structure (column names): + + stressor ... original index name + compartment ... original index name + region ... original column name + factor ... the factor for multiplication/characterization + If no factor is given, the factor is assumed to be 1. + This can be used, to simplify renaming/aggregation mappings. + impact__stressor ... the new index name, + replacing the previous index name "stressor". + Thus here "stressor" will be renamed to "impact", and the row index + will be renamed by the entries here. + compartment__compartment ... the new compartment, + replacing the original compartment. No rename of column happens here, + still row index will be renamed as given here. + + the columns with __ are called bridge columns, they are used + to match the original index. The new dataframe with have index names + based on the first part of the bridge column, in the order + in which the bridge columns are given in the mapping dataframe. - Calls convert function based on data given in df_map + "region" is constraining column, these can either be for the index or column + in df_orig. In case both exist, the one in index is preferred. + + extension_name: str + The name of the new extension returned - Difference to df_map: runs across all extensions. - Internally, this call extension_extract through all extensions - and then calls the convert function on the temporarily extracted - extension. + extension_col_name : str, optional + Name of the column specifying the extension name in df_map. + The entry in df_map here can either be the name returned by Extension.name or the + name of the Extension instance. + Default: 'extension' - Switch: also return the extracted raw_data + agg_func : str or func + the aggregation function to use for multiple matchings (summation by default) - df_map difference to other df_map: needs a column with the extension name + drop_not_bridged_index : bool, optional + What to do with index levels in df_orig not appearing in the bridge columns. + If True, drop them after aggregation across these, if False, + pass them through to the result. + + *Note:* Only index levels will be dropped, not columns. + + In case some index levels need to be dropped, and some not + make a bridge column for the ones to be dropped and map all to the same name. + Then drop this index level after the conversion. + + unit_column_orig : str, optional + Name of the column in df_map with the original unit. + This will be used to check if the unit matches the original unit in the extension. + Default is "unit_orig", if None, no check is performed. + + unit_column_new : str, optional + Name of the column in df_map with the new unit to be assigned to the new extension. + Default is "unit_new", if None same unit as in df_orig TODO EXPLAIN BETTER, THINK WARNING + + ignore_columns : list, optional + List of column names in df_map which should be ignored. + These could be columns with additional information, etc. + The unit columns given in unit_column_orig and unit_column_new + are ignored by default. + + + TODO: remove after explain + Extension for extensions: + extension ... extension name + unit_orig ... the original unit (optional, for double check with the unit) + unit_new ... the new unit to be set for the extension """ + if not ignore_columns: + ignore_columns = [] + ignore_columns.append(extension_col_name) + + extensions_to_consider = df_map.loc[:, extension_col_name].unique() + + gather = dict() + + for ext in extensions_to_consider: + gather.update( + self._apply_extension_method( + extensions=ext, + method="convert", + df_map=df_map[df_map[extension_col_name] == ext], + agg_func=agg_func, + extension_name=extension_name, + drop_not_bridged_index=drop_not_bridged_index, + unit_column_orig=unit_column_orig, + unit_column_new=unit_column_new, + ignore_columns=ignore_columns, + ) + ) + + result_ext = concate_extension(list(gather.values()), name=extension_name) + + return result_ext # look for extension name in df_map # make unique extension list, and call extension_extract for all diff --git a/tests/test_core.py b/tests/test_core.py index 9b250206..2fc1d373 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -554,6 +554,7 @@ def test_characterize_extension(fix_testmrio): def test_extension_convert(fix_testmrio): + """Testing the convert function within extensions""" tt_pre = fix_testmrio.testmrio.copy() df_map = pd.DataFrame( columns=[ @@ -638,6 +639,36 @@ def test_extension_convert(fix_testmrio): assert tt_post.post_calc.unit.loc["water_emissions", "unit"] == "g" +def test_extension_convert_full(fix_testmrio): + """Testing the convert function called from the full MRIO""" + + tt_pre = fix_testmrio.testmrio.copy() + + df_map = pd.DataFrame( + columns=[ + "extension", + "stressor", + "compartment", + "total__stressor", + "factor", + "unit_orig", + "unit_new", + ], + data=[ + ["Emissions", "emis.*", "air|water", "total_sum_tonnes", 1e-3, "kg", "t"], + ["emissions", "emission_type[1|2]", ".*", "total_sum", 1, "kg", "kg"], + ["emissions", "emission_type1", ".*", "air_emissions", 1e-3, "kg", "t"], + ["Emissions", "emission_type2", ".*", "water_emissions", 1000, "kg", "g"], + ], + ) + + x = tt_pre.extension_convert(df_map, extension_name="emissions_new_pre_calc") + # CONT: + # write test with units + # make a second extensions are check running over 2 + # cleanup docstrings and write docs + + def test_extension_convert_test_unit_fail(fix_testmrio): df_fail1 = pd.DataFrame( columns=[