Skip to content

Commit

Permalink
fixed issue with duplicates in unit setting for characterization
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Oct 26, 2024
1 parent ea0717e commit 3f24b03
Show file tree
Hide file tree
Showing 4 changed files with 96 additions and 16 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
Changelog
#########


v0.6dev
=======

Expand Down
94 changes: 81 additions & 13 deletions doc/source/notebooks/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -269,7 +269,7 @@
# For that, we assume some land use results for different regions:

# %%
land_use_result = pd.DataFrame(
land_use_data = pd.DataFrame(
columns=["Region1", "Region2", "Region3"],
index=[
"Wheat",
Expand All @@ -288,9 +288,9 @@
[43, 17, 24],
],
)
land_use_result.index.names = ["stressor"]
land_use_result.columns.names = ["region"]
land_use_result
land_use_data.index.names = ["stressor"]
land_use_data.columns.names = ["region"]
land_use_data

# %% [markdown]
# Now we setup a pseudo characterization table for converting the land use data into
Expand Down Expand Up @@ -342,8 +342,8 @@
# With that setup we can now characterize the land use data in land_use_result.

# %%
biodiv_result = pymrio.convert(land_use_result, landuse_characterization)
biodiv_result
biodiv_characterised = pymrio.convert(land_use_data, landuse_characterization)
biodiv_characterised

# %% [markdown]
# Note, that in this example the region is not in the index
Expand All @@ -354,25 +354,93 @@
# output. Thus the result is equivalent to

# %%
land_use_result_stacked = land_use_result.stack(level="region")
land_use_result_stacked
land_use_data_stacked = land_use_data.stack(level="region")
land_use_data_stacked

# %%
biodiv_result_stacked = pymrio.convert(
land_use_result_stacked, landuse_characterization, drop_not_bridged_index=False
biodiv_characterised_stacked = pymrio.convert(
land_use_data_stacked, landuse_characterization, drop_not_bridged_index=False
)
biodiv_result_stacked.unstack(level="region")[0]
biodiv_characterised_stacked.unstack(level="region")[0]

# %% [markdown]
# In this case we have to specify to not drop the not bridged "region" index.
# We then unstack the result again, and have to select the first element ([0]),
# since there where not other columns left after stacking them before the
# characterization.
#
# CONT: start working on convert for extensions/mrio method


# %% [markdown]
# TODO: section perhaps needed somewhere?
# Irrespectively of the table or the mrio system, the convert function always follows the same pattern.
# It requires a bridge table, which contains the mapping of the indices of the source data to the indices of the target data.
# This bridge table has to follow a specific format, depending on the table to be converted.

# %% [markdown]
## Converting pymrio Extensions

# %% [markdown]
# The same principles as for individual tables can be used for converting full pymrio type Extensions (aka satellite accounts).
# In difference to the single tables, pymrio Extensions consist of several pandas DataFrames which can be converted in one go.
# Almost the same bridge table structure as for single tables can be used. The main additional information needed is in regard to
# units. Since pymrio Extensions include a unit dataframe, information about the unit names need to be included.

# %% [markdown]
# Extensions can be converted either one at a time, but the main power of the method lies in collecting stressor data across different extensions
# and converting them in one go.

# %% [markdown]
# We start with a simple example for converting a single extension of a pymrio MRIO system.
# To do so, we load the test MRIO system from pymrio.

# %%
mrio = pymrio.load_test()

# %% [markdown]
# Among others, this system has an extension "emissions" with industry and final demand emissions.


# %%
mrio.emissions.F

# %%
mrio.emissions.F_Y

# %%
mrio.emissions.unit

# %% [markdown]
# We now setup a bridge table for converting/characterizing these emission data
# to several other accounts.

# %%
emis_bridge = pd.DataFrame(
columns=[
"stressor",
"compartment",
"total__stressor",
"factor",
"unit_orig",
"unit_new",
],
data=[
["emis.*", "air|water", "total_sum_tonnes", 1e-3, "kg", "t"],
["emission_type[1|2]", ".*", "total_sum", 1, "kg", "kg"],
["emission_type1", ".*", "air_emissions", 1e-3, "kg", "t"],
["emission_type2", ".*", "water_emissions", 1000, "kg", "g"],
],
)
emis_bridge

# %% [markdown]
# This is a fully made up example showing various capabilities of the method.
# In line
# - 0: find all stressors with emissions (emis.*) in either air or water (air|water) compartment, rename it to "total_sum_tonnes" (total__stressor) by multiplying with a factor 0.0001 which converts the original unit "kg" to tonnes.
# - 1: find emission_type1 and 2, over all compartments and sum them together without any multiplication
# - 2: convert emissions of type 1 to air emissions in tons
# - 3: convert emissions of type 2 to water emissions in g


# %%
mrio.emissions.convert(emis_bridge, new_extension_name="abc").F

9 changes: 6 additions & 3 deletions pymrio/core/mriosystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -2079,15 +2079,18 @@ def convert(
unit = pd.DataFrame(columns=["unit"], index=new_extension.get_rows())
bridge_columns = [col for col in df_map.columns if "__" in col]
unique_new_index = (
df_map.loc[:, bridge_columns]
.drop_duplicates()
df_map
.drop_duplicates(subset=bridge_columns)
.loc[:, bridge_columns]
.set_index(bridge_columns)
.index
)
unique_new_index.names = [col.split("__")[0] for col in bridge_columns]

unit.unit = (
df_map.set_index(bridge_columns)
df_map
.drop_duplicates(subset=bridge_columns)
.set_index(bridge_columns)
.loc[unique_new_index]
.loc[:, unit_column_new]
)
Expand Down
8 changes: 8 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,6 +556,7 @@ def test_characterize_extension(fix_testmrio):
def test_extension_convert(fix_testmrio):
"""Testing the convert function within extensions object"""
tt_pre = fix_testmrio.testmrio.copy()

df_map = pd.DataFrame(
columns=[
"stressor",
Expand All @@ -570,13 +571,19 @@ def test_extension_convert(fix_testmrio):
["emission_type[1|2]", ".*", "total_sum", 1, "kg", "kg"],
["emission_type1", ".*", "air_emissions", 1e-3, "kg", "t"],
["emission_type2", ".*", "water_emissions", 1000, "kg", "g"],
["emission_type1", ".*", "char_emissions", 2, "kg", "kg_eq"],
["emission_type2", ".*", "char_emissions", 10, "kg", "kg_eq"],
],
)

tt_pre.pre_calc = tt_pre.emissions.convert(
df_map, new_extension_name="emissions_new_pre_calc"
)

tt_pre.calc_all()

# CONT: continue writing tests for characterized "char_emissions"

pdt.assert_series_equal(
tt_pre.emissions.D_cba.loc["emission_type1", "air"],
tt_pre.pre_calc.D_cba.loc["air_emissions"] * 1000,
Expand All @@ -603,6 +610,7 @@ def test_extension_convert(fix_testmrio):
assert tt_pre.pre_calc.unit.loc["total_sum", "unit"] == "kg"
assert tt_pre.pre_calc.unit.loc["air_emissions", "unit"] == "t"
assert tt_pre.pre_calc.unit.loc["water_emissions", "unit"] == "g"
assert tt_pre.pre_calc.unit.loc["char_emissions", "unit"] == "kg_eq"

tt_post = fix_testmrio.testmrio.copy()
tt_post.calc_all()
Expand Down

0 comments on commit 3f24b03

Please sign in to comment.