Skip to content

Commit

Permalink
all extension convert implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Aug 15, 2024
1 parent 6ae4ba2 commit 6c131d8
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 10 deletions.
141 changes: 131 additions & 10 deletions pymrio/core/mriosystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -3245,23 +3245,144 @@ def remove_extension(self, ext):

return self

def convert_extensions(
self, df_map, extension_name, agg_func="sum", drop_not_bridged_index=True
def extension_convert(
self,
df_map,
extension_name,
extension_col_name="extension",
agg_func="sum",
drop_not_bridged_index=True,
unit_column_orig="unit_orig",
unit_column_new="unit_new",
ignore_columns=None,
):
"""Builds a new extension based on conversion of existing ones
"""Apply the convert function to all extensions
Internally that calls the Extension.convert function for all extensions.
See the Extension.convert function for more details.
TODO: put all details here
Parameters
----------
df_map : pd.DataFrame
The DataFrame with the mapping of the old to the new classification.
This requires a specific structure:
- Constraining data (e.g. stressors, regions, sectors) can be
either in the index or columns of df_orig. The need to have the same
name as the named index or column in df_orig. The algorithm searches
for matching data in df_orig based on all constraining columns in df_map.
- Bridge columns are columns with '__' in the name. These are used to
map (bridge) some/all of the constraining columns in df_orig to the new
classification.
- One column "factor", which gives the multiplication factor for the
conversion. If it is missing, it is set to 1.
This is better explained with an example.
Assuming a original dataframe df_orig with
index names 'stressor' and 'compartment' and column name 'region',
the characterizing dataframe could have the following structure (column names):
stressor ... original index name
compartment ... original index name
region ... original column name
factor ... the factor for multiplication/characterization
If no factor is given, the factor is assumed to be 1.
This can be used, to simplify renaming/aggregation mappings.
impact__stressor ... the new index name,
replacing the previous index name "stressor".
Thus here "stressor" will be renamed to "impact", and the row index
will be renamed by the entries here.
compartment__compartment ... the new compartment,
replacing the original compartment. No rename of column happens here,
still row index will be renamed as given here.
the columns with __ are called bridge columns, they are used
to match the original index. The new dataframe with have index names
based on the first part of the bridge column, in the order
in which the bridge columns are given in the mapping dataframe.
Calls convert function based on data given in df_map
"region" is constraining column, these can either be for the index or column
in df_orig. In case both exist, the one in index is preferred.
extension_name: str
The name of the new extension returned
Difference to df_map: runs across all extensions.
Internally, this call extension_extract through all extensions
and then calls the convert function on the temporarily extracted
extension.
extension_col_name : str, optional
Name of the column specifying the extension name in df_map.
The entry in df_map here can either be the name returned by Extension.name or the
name of the Extension instance.
Default: 'extension'
Switch: also return the extracted raw_data
agg_func : str or func
the aggregation function to use for multiple matchings (summation by default)
df_map difference to other df_map: needs a column with the extension name
drop_not_bridged_index : bool, optional
What to do with index levels in df_orig not appearing in the bridge columns.
If True, drop them after aggregation across these, if False,
pass them through to the result.
*Note:* Only index levels will be dropped, not columns.
In case some index levels need to be dropped, and some not
make a bridge column for the ones to be dropped and map all to the same name.
Then drop this index level after the conversion.
unit_column_orig : str, optional
Name of the column in df_map with the original unit.
This will be used to check if the unit matches the original unit in the extension.
Default is "unit_orig", if None, no check is performed.
unit_column_new : str, optional
Name of the column in df_map with the new unit to be assigned to the new extension.
Default is "unit_new", if None same unit as in df_orig TODO EXPLAIN BETTER, THINK WARNING
ignore_columns : list, optional
List of column names in df_map which should be ignored.
These could be columns with additional information, etc.
The unit columns given in unit_column_orig and unit_column_new
are ignored by default.
TODO: remove after explain
Extension for extensions:
extension ... extension name
unit_orig ... the original unit (optional, for double check with the unit)
unit_new ... the new unit to be set for the extension
"""
if not ignore_columns:
ignore_columns = []
ignore_columns.append(extension_col_name)

extensions_to_consider = df_map.loc[:, extension_col_name].unique()

gather = dict()

for ext in extensions_to_consider:
gather.update(
self._apply_extension_method(
extensions=ext,
method="convert",
df_map=df_map[df_map[extension_col_name] == ext],
agg_func=agg_func,
extension_name=extension_name,
drop_not_bridged_index=drop_not_bridged_index,
unit_column_orig=unit_column_orig,
unit_column_new=unit_column_new,
ignore_columns=ignore_columns,
)
)

result_ext = concate_extension(list(gather.values()), name=extension_name)

return result_ext

# look for extension name in df_map
# make unique extension list, and call extension_extract for all
Expand Down
31 changes: 31 additions & 0 deletions tests/test_core.py
Original file line number Diff line number Diff line change
Expand Up @@ -554,6 +554,7 @@ def test_characterize_extension(fix_testmrio):


def test_extension_convert(fix_testmrio):
"""Testing the convert function within extensions"""
tt_pre = fix_testmrio.testmrio.copy()
df_map = pd.DataFrame(
columns=[
Expand Down Expand Up @@ -638,6 +639,36 @@ def test_extension_convert(fix_testmrio):
assert tt_post.post_calc.unit.loc["water_emissions", "unit"] == "g"


def test_extension_convert_full(fix_testmrio):
"""Testing the convert function called from the full MRIO"""

tt_pre = fix_testmrio.testmrio.copy()

df_map = pd.DataFrame(
columns=[
"extension",
"stressor",
"compartment",
"total__stressor",
"factor",
"unit_orig",
"unit_new",
],
data=[
["Emissions", "emis.*", "air|water", "total_sum_tonnes", 1e-3, "kg", "t"],
["emissions", "emission_type[1|2]", ".*", "total_sum", 1, "kg", "kg"],
["emissions", "emission_type1", ".*", "air_emissions", 1e-3, "kg", "t"],
["Emissions", "emission_type2", ".*", "water_emissions", 1000, "kg", "g"],
],
)

x = tt_pre.extension_convert(df_map, extension_name="emissions_new_pre_calc")
# CONT:
# write test with units
# make a second extensions are check running over 2
# cleanup docstrings and write docs


def test_extension_convert_test_unit_fail(fix_testmrio):
df_fail1 = pd.DataFrame(
columns=[
Expand Down

0 comments on commit 6c131d8

Please sign in to comment.