diff --git a/CHANGELOG.rst b/CHANGELOG.rst index f8a6b4e2..44a0856e 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -2,6 +2,7 @@ Changelog ######### + v0.6dev ======= diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py index f499c6a7..526b48bc 100644 --- a/doc/source/notebooks/convert.py +++ b/doc/source/notebooks/convert.py @@ -269,7 +269,7 @@ # For that, we assume some land use results for different regions: # %% -land_use_result = pd.DataFrame( +land_use_data = pd.DataFrame( columns=["Region1", "Region2", "Region3"], index=[ "Wheat", @@ -288,9 +288,9 @@ [43, 17, 24], ], ) -land_use_result.index.names = ["stressor"] -land_use_result.columns.names = ["region"] -land_use_result +land_use_data.index.names = ["stressor"] +land_use_data.columns.names = ["region"] +land_use_data # %% [markdown] # Now we setup a pseudo characterization table for converting the land use data into @@ -342,8 +342,8 @@ # With that setup we can now characterize the land use data in land_use_result. # %% -biodiv_result = pymrio.convert(land_use_result, landuse_characterization) -biodiv_result +biodiv_characterised = pymrio.convert(land_use_data, landuse_characterization) +biodiv_characterised # %% [markdown] # Note, that in this example the region is not in the index @@ -354,25 +354,93 @@ # output. Thus the result is equivalent to # %% -land_use_result_stacked = land_use_result.stack(level="region") -land_use_result_stacked +land_use_data_stacked = land_use_data.stack(level="region") +land_use_data_stacked # %% -biodiv_result_stacked = pymrio.convert( - land_use_result_stacked, landuse_characterization, drop_not_bridged_index=False +biodiv_characterised_stacked = pymrio.convert( + land_use_data_stacked, landuse_characterization, drop_not_bridged_index=False ) -biodiv_result_stacked.unstack(level="region")[0] +biodiv_characterised_stacked.unstack(level="region")[0] # %% [markdown] # In this case we have to specify to not drop the not bridged "region" index. # We then unstack the result again, and have to select the first element ([0]), # since there where not other columns left after stacking them before the # characterization. -# -# CONT: start working on convert for extensions/mrio method # %% [markdown] +# TODO: section perhaps needed somewhere? # Irrespectively of the table or the mrio system, the convert function always follows the same pattern. # It requires a bridge table, which contains the mapping of the indices of the source data to the indices of the target data. # This bridge table has to follow a specific format, depending on the table to be converted. + +# %% [markdown] +## Converting pymrio Extensions + +# %% [markdown] +# The same principles as for individual tables can be used for converting full pymrio type Extensions (aka satellite accounts). +# In difference to the single tables, pymrio Extensions consist of several pandas DataFrames which can be converted in one go. +# Almost the same bridge table structure as for single tables can be used. The main additional information needed is in regard to +# units. Since pymrio Extensions include a unit dataframe, information about the unit names need to be included. + +# %% [markdown] +# Extensions can be converted either one at a time, but the main power of the method lies in collecting stressor data across different extensions +# and converting them in one go. + +# %% [markdown] +# We start with a simple example for converting a single extension of a pymrio MRIO system. +# To do so, we load the test MRIO system from pymrio. + +# %% +mrio = pymrio.load_test() + +# %% [markdown] +# Among others, this system has an extension "emissions" with industry and final demand emissions. + + +# %% +mrio.emissions.F + +# %% +mrio.emissions.F_Y + +# %% +mrio.emissions.unit + +# %% [markdown] +# We now setup a bridge table for converting/characterizing these emission data +# to several other accounts. + +# %% +emis_bridge = pd.DataFrame( + columns=[ + "stressor", + "compartment", + "total__stressor", + "factor", + "unit_orig", + "unit_new", + ], + data=[ + ["emis.*", "air|water", "total_sum_tonnes", 1e-3, "kg", "t"], + ["emission_type[1|2]", ".*", "total_sum", 1, "kg", "kg"], + ["emission_type1", ".*", "air_emissions", 1e-3, "kg", "t"], + ["emission_type2", ".*", "water_emissions", 1000, "kg", "g"], + ], +) +emis_bridge + +# %% [markdown] +# This is a fully made up example showing various capabilities of the method. +# In line +# - 0: find all stressors with emissions (emis.*) in either air or water (air|water) compartment, rename it to "total_sum_tonnes" (total__stressor) by multiplying with a factor 0.0001 which converts the original unit "kg" to tonnes. +# - 1: find emission_type1 and 2, over all compartments and sum them together without any multiplication +# - 2: convert emissions of type 1 to air emissions in tons +# - 3: convert emissions of type 2 to water emissions in g + + +# %% +mrio.emissions.convert(emis_bridge, new_extension_name="abc").F + diff --git a/pymrio/core/mriosystem.py b/pymrio/core/mriosystem.py index e2128948..17b61ee0 100644 --- a/pymrio/core/mriosystem.py +++ b/pymrio/core/mriosystem.py @@ -2079,15 +2079,18 @@ def convert( unit = pd.DataFrame(columns=["unit"], index=new_extension.get_rows()) bridge_columns = [col for col in df_map.columns if "__" in col] unique_new_index = ( - df_map.loc[:, bridge_columns] - .drop_duplicates() + df_map + .drop_duplicates(subset=bridge_columns) + .loc[:, bridge_columns] .set_index(bridge_columns) .index ) unique_new_index.names = [col.split("__")[0] for col in bridge_columns] unit.unit = ( - df_map.set_index(bridge_columns) + df_map + .drop_duplicates(subset=bridge_columns) + .set_index(bridge_columns) .loc[unique_new_index] .loc[:, unit_column_new] ) diff --git a/tests/test_core.py b/tests/test_core.py index de69fe09..34bc6241 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -556,6 +556,7 @@ def test_characterize_extension(fix_testmrio): def test_extension_convert(fix_testmrio): """Testing the convert function within extensions object""" tt_pre = fix_testmrio.testmrio.copy() + df_map = pd.DataFrame( columns=[ "stressor", @@ -570,13 +571,19 @@ def test_extension_convert(fix_testmrio): ["emission_type[1|2]", ".*", "total_sum", 1, "kg", "kg"], ["emission_type1", ".*", "air_emissions", 1e-3, "kg", "t"], ["emission_type2", ".*", "water_emissions", 1000, "kg", "g"], + ["emission_type1", ".*", "char_emissions", 2, "kg", "kg_eq"], + ["emission_type2", ".*", "char_emissions", 10, "kg", "kg_eq"], ], ) + tt_pre.pre_calc = tt_pre.emissions.convert( df_map, new_extension_name="emissions_new_pre_calc" ) + tt_pre.calc_all() + # CONT: continue writing tests for characterized "char_emissions" + pdt.assert_series_equal( tt_pre.emissions.D_cba.loc["emission_type1", "air"], tt_pre.pre_calc.D_cba.loc["air_emissions"] * 1000, @@ -603,6 +610,7 @@ def test_extension_convert(fix_testmrio): assert tt_pre.pre_calc.unit.loc["total_sum", "unit"] == "kg" assert tt_pre.pre_calc.unit.loc["air_emissions", "unit"] == "t" assert tt_pre.pre_calc.unit.loc["water_emissions", "unit"] == "g" + assert tt_pre.pre_calc.unit.loc["char_emissions", "unit"] == "kg_eq" tt_post = fix_testmrio.testmrio.copy() tt_post.calc_all()