diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py index d8f992c6..20cc89ab 100644 --- a/doc/source/notebooks/convert.py +++ b/doc/source/notebooks/convert.py @@ -19,7 +19,7 @@ # Pymrio contains several possibilities to convert data from one system to another. # %% [markdown] -# The term *convert* is meant very general here, it contains +# The term *convert* is meant very general here, it contains # - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification # - converting the names of the found indicies # - adjusting the numerical values of the data, e.g. for unit conversion or characterisation @@ -33,7 +33,7 @@ # %% [markdown] -# Irrespectively of the table or the mrio system, the convert function always follows the same pattern. +# Irrespectively of the table or the mrio system, the convert function always follows the same pattern. # It requires a bridge table, which contains the mapping of the indicies of the source data to the indicies of the target data. # This bridge table has to follow a specific format, depending on the table to be converted. @@ -62,5 +62,3 @@ # %% [markdown] # ## Converting a pymrio extension - - diff --git a/doc/source/notebooks/load_save_export.py b/doc/source/notebooks/load_save_export.py index 593c892c..10f9b8c0 100644 --- a/doc/source/notebooks/load_save_export.py +++ b/doc/source/notebooks/load_save_export.py @@ -21,9 +21,10 @@ # %% [markdown] # Here, we use the included small test MRIO system to highlight the different function. The same functions are available for any MRIO loaded into pymrio. Expect, however, significantly decreased performance due to the size of real MRIO system. +import os + # %% import pymrio -import os io = pymrio.load_test().calc_all() diff --git a/doc/source/notebooks/stressor_characterization.py b/doc/source/notebooks/stressor_characterization.py index b94f6e17..fb0c55fe 100644 --- a/doc/source/notebooks/stressor_characterization.py +++ b/doc/source/notebooks/stressor_characterization.py @@ -36,7 +36,6 @@ import pandas as pd - import pymrio from pymrio.core.constants import PYMRIO_PATH # noqa diff --git a/pymrio/tools/iodownloader.py b/pymrio/tools/iodownloader.py index db1108ae..3cc1a81d 100644 --- a/pymrio/tools/iodownloader.py +++ b/pymrio/tools/iodownloader.py @@ -1,5 +1,4 @@ -""" Utility functions for automatic downloading of public MRIO databases -""" +"""Utility functions for automatic downloading of public MRIO databases""" import getpass import itertools diff --git a/pymrio/tools/iomath.py b/pymrio/tools/iomath.py index 323b0fd8..f410dbab 100644 --- a/pymrio/tools/iomath.py +++ b/pymrio/tools/iomath.py @@ -1,4 +1,4 @@ -""" Mathematical functions for input output calculations +"""Mathematical functions for input output calculations All methods here should follow the functional programming paradigm diff --git a/pymrio/tools/iometadata.py b/pymrio/tools/iometadata.py index acd3a606..8957eb29 100644 --- a/pymrio/tools/iometadata.py +++ b/pymrio/tools/iometadata.py @@ -1,5 +1,4 @@ -""" Meta data for provenance and version tracking in pymrio -""" +"""Meta data for provenance and version tracking in pymrio""" import datetime import getpass diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py index 07ee6ff3..2bea3d5a 100644 --- a/pymrio/tools/ioutil.py +++ b/pymrio/tools/ioutil.py @@ -999,13 +999,13 @@ def _index_regex_matcher(_dfs_idx, _method, _find_all=None, **kwargs): def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): - """ Convert a DataFrame to a new classification + """Convert a DataFrame to a new classification Parameters ---------- df_orig : pd.DataFrame The DataFrame to process. All matching occurs on the index. - Thus stack the tables if necessary. + Stack tables if necessary. df_map : pd.DataFrame The DataFrame with the mapping of the old to the new classification. @@ -1014,17 +1014,24 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): and one column for each new index level in the characterized result dataframe. This is better explained with an example. - Assuming a dataframe with index names 'stressor' and 'compartment' - the characterizing dataframe would have the following structure: + Assuming a original dataframe df_orig with + index names 'stressor' and 'compartment' + the characterizing dataframe would have the following structure (column names): stressor ... original index name compartment ... original index name - factor ... the factor for multiplication - impact__stressor ... the new index name, replacing the previous index name "stressor" - compartment__compartment ... the new compartment, replacing the original compartment - - the columsn with __ we call bridge columns, they are used to match the original index - the new dataframe with have index names based on the first part of the bridge column, in the order + factor ... the factor for multiplication/characterization + impact__stressor ... the new index name, + replacing the previous index name "stressor". + Thus here "stressor" will be renamed to "impact", and the row index + will be renamed by the entries here. + compartment__compartment ... the new compartment, + replacing the original compartment. No rename of column happens here, + still row index will be renamed as given here. + + the columsn with __ are called bridge columns, they are used + to match the original index. The new dataframe with have index names + based on the first part of the bridge column, in the order in which the bridge columns are given in the mapping dataframe. The structure "stressor" and "impact__stressor" is important. @@ -1049,30 +1056,10 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): """ - # df_orig = pd.DataFrame( - # data=5, - # index=pd.MultiIndex.from_product([["em1", "em2"], ["air", "water"]]), - # columns=pd.MultiIndex.from_product([["r1", "c1"], ["r2", "c2"]]), - # ) - # df_orig.columns.names = ["reg", "sec"] - # df_orig.index.names = ["em_type", "compart"] - # - # df_map = pd.DataFrame( - # columns=["em_type", "compart", "total__em_type", "factor"], - # data=[ - # ["em.*", "air|water", "total_regex", 2], - # ["em1", "air", "total_sum", 2], - # ["em1", "water", "total_sum", 2], - # ["em2", "air", "total_sum", 2], - # ["em2", "water", "total_sum", 2], - # ["em1", "air", "all_air", 0.5], - # ["em2", "air", "all_air", 0.5], - # ], - # ) - # - # bridge_columns = [col for col in df_map.columns if "__" in col] - unique_new_index = df_map.loc[:, bridge_columns].drop_duplicates().set_index(bridge_columns).index + unique_new_index = ( + df_map.loc[:, bridge_columns].drop_duplicates().set_index(bridge_columns).index + ) bridge_components = namedtuple("bridge_components", ["new", "orig", "raw"]) bridges = [] @@ -1086,10 +1073,14 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): else: raise ValueError(f"Column {col} contains more then one '__'") assert bridge.orig in df_map.columns, f"Column {bridge.new} not in df_map" - assert bridge.orig in df_orig.index.names, f"Column {bridge.orig} not in df_orig" + assert ( + bridge.orig in df_orig.index.names + ), f"Column {bridge.orig} not in df_orig" bridges.append(bridge) - orig_index_not_bridged = [ix for ix in df_orig.index.names if ix not in [b.orig for b in bridges]] + orig_index_not_bridged = [ + ix for ix in df_orig.index.names if ix not in [b.orig for b in bridges] + ] df_map = df_map.set_index(bridge_columns) res_collector = [] @@ -1125,7 +1116,7 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): ) # CONT: test cases for wrong input - # CONT: test cases for just rename + # CONT: docs for just rename (see tests already done) # CONT: docs with test cases res_collector.append( df_collected.groupby(by=df_collected.index.names).agg(agg_func) @@ -1137,7 +1128,9 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): all_result = all_result.reset_index(level=orig_index_not_bridged, drop=True) else: # move the not bridged index levels to the end of the index - new_index = [ix for ix in all_result.index.names if ix not in orig_index_not_bridged] + new_index = [ + ix for ix in all_result.index.names if ix not in orig_index_not_bridged + ] all_result = all_result.reorder_levels(new_index + orig_index_not_bridged) agg_all = all_result.groupby(by=all_result.index.names).agg(agg_func) diff --git a/setup.py b/setup.py index f46d1f2d..69a10bfc 100644 --- a/setup.py +++ b/setup.py @@ -37,6 +37,7 @@ "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3 :: Only", "License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)", "Development Status :: 4 - Beta", diff --git a/tests/test_aggregation.py b/tests/test_aggregation.py index c0cafd41..5d252184 100644 --- a/tests/test_aggregation.py +++ b/tests/test_aggregation.py @@ -1,4 +1,4 @@ -""" Tests the aggregation functionality in pymrio +"""Tests the aggregation functionality in pymrio This only test the top-level aggregation function. For the low-level function 'build_agg_vec' and 'build_agg_matrix' @@ -211,7 +211,9 @@ def test_wrong_inputs(): with pytest.raises(ValueError) as VA_region_name: reg_agg = range(len(io.get_regions())) _ = io.aggregate( - region_agg=reg_agg, region_names=["a", "b"], inplace=False # noqa + region_agg=reg_agg, + region_names=["a", "b"], + inplace=False, # noqa ) assert "region aggregation" in str(VA_region_name.value).lower() diff --git a/tests/test_core.py b/tests/test_core.py index 4f3e2d13..91e69677 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,5 +1,4 @@ -""" Testing core functionality of pymrio -""" +"""Testing core functionality of pymrio""" import os import sys diff --git a/tests/test_integration.py b/tests/test_integration.py index 03d6fbdf..7bf4c587 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -1,4 +1,4 @@ -""" Testing functions for the full run based on +"""Testing functions for the full run based on the small MRIO given within pymrio. This tests the full computation and fileio. diff --git a/tests/test_math.py b/tests/test_math.py index 3b54cdbe..3f072637 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -1,4 +1,4 @@ -""" test cases for all mathematical functions """ +"""test cases for all mathematical functions""" import os import sys diff --git a/tests/test_outputs.py b/tests/test_outputs.py index 9b18702b..f8bc3099 100644 --- a/tests/test_outputs.py +++ b/tests/test_outputs.py @@ -1,16 +1,16 @@ -""" Test for producing graphical outputs +"""Test for producing graphical outputs - The report functionality is tested separately - in test_integration +The report functionality is tested separately +in test_integration - Note - ---- +Note +---- - Here we use the values returned from the plotted graph - for testing. Regression tests against plotted graphs, - as provided by image_comparison decorator of matplotlib, - are not used since this is deprecated and also not consistent - across different plotting engines. +Here we use the values returned from the plotted graph +for testing. Regression tests against plotted graphs, +as provided by image_comparison decorator of matplotlib, +are not used since this is deprecated and also not consistent +across different plotting engines. """ diff --git a/tests/test_parsers.py b/tests/test_parsers.py index 29c9a087..74bd1100 100644 --- a/tests/test_parsers.py +++ b/tests/test_parsers.py @@ -1,4 +1,4 @@ -""" Tests the parsing of different MRIOs """ +"""Tests the parsing of different MRIOs""" import os import sys diff --git a/tests/test_util.py b/tests/test_util.py index 128198d6..9e955e52 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -1,4 +1,4 @@ -""" test cases for all util functions """ +"""test cases for all util functions""" import os import string @@ -20,13 +20,13 @@ from pymrio.tools.ioutil import ( # noqa build_agg_matrix, build_agg_vec, + convert, diagonalize_blocks, filename_from_url, find_first_number, index_contains, index_fullmatch, index_match, - convert, set_block, sniff_csv_format, ) @@ -355,34 +355,68 @@ def test_util_regex(): assert len(df_none_match_index) == 0 - def test_convert_rename(): - """ Testing the renaming of one table""" + """Testing the renaming of one table""" to_char = pd.DataFrame( - data=5, + data=99.0, index=pd.MultiIndex.from_product([["em1", "em2", "emA"], ["air", "water"]]), columns=pd.MultiIndex.from_product([["r1", "c1"], ["r2", "c2"]]), ) + to_char.columns.names = ["reg", "sec"] to_char.index.names = ["em_type", "compart"] - - rename_bridge = pd.DataFrame( - columns=["em_type", "compart", "stressor__em_type", "factor"], + rename_bridge_simple = pd.DataFrame( + columns=["em_type", "stressor__em_type", "factor"], data=[ - ["em1", "air|water", "emission-1", 2], - # ["em1", "air", "total_sum", 2], - # ["em1", "water", "total_sum", 2], - # ["em2", "air", "total_sum", 2], - # ["em2", "water", "total_sum", 2], - # ["em1", "air", "all_air", 0.5], - # ["em2", "air", "all_air", 0.5], + ["em1", "emission-1", 1], + ["em2", "emission2", 1], + ["emA", "emission A", 1], ], ) - char_res = convert(to_char, rename_bridge) + char_res_keep_comp = convert(to_char, rename_bridge_simple, drop_not_bridged=False) + assert all(char_res_keep_comp.columns == to_char.columns) + assert all( + char_res_keep_comp.index.get_level_values("compart") + == to_char.index.get_level_values("compart") + ) + npt.assert_allclose(char_res_keep_comp.values, to_char.values) + + pdt.assert_index_equal( + char_res_keep_comp.index.get_level_values("stressor"), + pd.Index( + [ + "emission A", + "emission A", + "emission-1", + "emission-1", + "emission2", + "emission2", + ], + dtype="object", + name="stressor", + ), + ) + char_res_agg_comp = convert(to_char, rename_bridge_simple, drop_not_bridged=True) + + assert all(char_res_agg_comp.columns == to_char.columns) + assert char_res_agg_comp.sum().sum() == to_char.sum().sum() + + pdt.assert_index_equal( + char_res_agg_comp.index, + pd.Index( + [ + "emission A", + "emission-1", + "emission2", + ], + dtype="object", + name="stressor", + ), + ) def test_convert_characterize(): @@ -439,20 +473,26 @@ def test_convert_characterize(): exp_res1B = pd.DataFrame( columns=to_char.columns, index=pd.MultiIndex.from_tuples( - [("all_air", "air"), ("total_regex", "air"), ("total_regex", "water"), ("total_sum", "air"), ("total_sum", "water")]), + [ + ("all_air", "air"), + ("total_regex", "air"), + ("total_regex", "water"), + ("total_sum", "air"), + ("total_sum", "water"), + ] + ), data=[ [5, 5, 5, 5], [20, 20, 20, 20], [20, 20, 20, 20], [20, 20, 20, 20], [20, 20, 20, 20], - ] + ], ) exp_res1B = exp_res1B.astype(float) exp_res1B.index.names = res1B.index.names - pdt.assert_frame_equal(res1B, exp_res1B) - + pdt.assert_frame_equal(res1B, exp_res1B) # TEST2 with impact per compartment (two index levels in the result)