Skip to content

Commit

Permalink
added testcase for rename via convert
Browse files Browse the repository at this point in the history
  • Loading branch information
konstantinstadler committed Jul 11, 2024
1 parent 1fad0b2 commit 6181066
Show file tree
Hide file tree
Showing 15 changed files with 116 additions and 85 deletions.
6 changes: 2 additions & 4 deletions doc/source/notebooks/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
# Pymrio contains several possibilities to convert data from one system to another.

# %% [markdown]
# The term *convert* is meant very general here, it contains
# The term *convert* is meant very general here, it contains
# - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification
# - converting the names of the found indicies
# - adjusting the numerical values of the data, e.g. for unit conversion or characterisation
Expand All @@ -33,7 +33,7 @@


# %% [markdown]
# Irrespectively of the table or the mrio system, the convert function always follows the same pattern.
# Irrespectively of the table or the mrio system, the convert function always follows the same pattern.
# It requires a bridge table, which contains the mapping of the indicies of the source data to the indicies of the target data.
# This bridge table has to follow a specific format, depending on the table to be converted.

Expand Down Expand Up @@ -62,5 +62,3 @@

# %% [markdown]
# ## Converting a pymrio extension


3 changes: 2 additions & 1 deletion doc/source/notebooks/load_save_export.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,10 @@
# %% [markdown]
# Here, we use the included small test MRIO system to highlight the different function. The same functions are available for any MRIO loaded into pymrio. Expect, however, significantly decreased performance due to the size of real MRIO system.

import os

# %%
import pymrio
import os

io = pymrio.load_test().calc_all()

Expand Down
1 change: 0 additions & 1 deletion doc/source/notebooks/stressor_characterization.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@

import pandas as pd


import pymrio
from pymrio.core.constants import PYMRIO_PATH # noqa

Expand Down
3 changes: 1 addition & 2 deletions pymrio/tools/iodownloader.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
""" Utility functions for automatic downloading of public MRIO databases
"""
"""Utility functions for automatic downloading of public MRIO databases"""

import getpass
import itertools
Expand Down
2 changes: 1 addition & 1 deletion pymrio/tools/iomath.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Mathematical functions for input output calculations
"""Mathematical functions for input output calculations
All methods here should follow the functional programming paradigm
Expand Down
3 changes: 1 addition & 2 deletions pymrio/tools/iometadata.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
""" Meta data for provenance and version tracking in pymrio
"""
"""Meta data for provenance and version tracking in pymrio"""

import datetime
import getpass
Expand Down
67 changes: 30 additions & 37 deletions pymrio/tools/ioutil.py
Original file line number Diff line number Diff line change
Expand Up @@ -999,13 +999,13 @@ def _index_regex_matcher(_dfs_idx, _method, _find_all=None, **kwargs):


def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
""" Convert a DataFrame to a new classification
"""Convert a DataFrame to a new classification
Parameters
----------
df_orig : pd.DataFrame
The DataFrame to process. All matching occurs on the index.
Thus stack the tables if necessary.
Stack tables if necessary.
df_map : pd.DataFrame
The DataFrame with the mapping of the old to the new classification.
Expand All @@ -1014,17 +1014,24 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
and one column for each new index level in the characterized result dataframe.
This is better explained with an example.
Assuming a dataframe with index names 'stressor' and 'compartment'
the characterizing dataframe would have the following structure:
Assuming a original dataframe df_orig with
index names 'stressor' and 'compartment'
the characterizing dataframe would have the following structure (column names):
stressor ... original index name
compartment ... original index name
factor ... the factor for multiplication
impact__stressor ... the new index name, replacing the previous index name "stressor"
compartment__compartment ... the new compartment, replacing the original compartment
the columsn with __ we call bridge columns, they are used to match the original index
the new dataframe with have index names based on the first part of the bridge column, in the order
factor ... the factor for multiplication/characterization
impact__stressor ... the new index name,
replacing the previous index name "stressor".
Thus here "stressor" will be renamed to "impact", and the row index
will be renamed by the entries here.
compartment__compartment ... the new compartment,
replacing the original compartment. No rename of column happens here,
still row index will be renamed as given here.
the columsn with __ are called bridge columns, they are used
to match the original index. The new dataframe with have index names
based on the first part of the bridge column, in the order
in which the bridge columns are given in the mapping dataframe.
The structure "stressor" and "impact__stressor" is important.
Expand All @@ -1049,30 +1056,10 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
"""

# df_orig = pd.DataFrame(
# data=5,
# index=pd.MultiIndex.from_product([["em1", "em2"], ["air", "water"]]),
# columns=pd.MultiIndex.from_product([["r1", "c1"], ["r2", "c2"]]),
# )
# df_orig.columns.names = ["reg", "sec"]
# df_orig.index.names = ["em_type", "compart"]
#
# df_map = pd.DataFrame(
# columns=["em_type", "compart", "total__em_type", "factor"],
# data=[
# ["em.*", "air|water", "total_regex", 2],
# ["em1", "air", "total_sum", 2],
# ["em1", "water", "total_sum", 2],
# ["em2", "air", "total_sum", 2],
# ["em2", "water", "total_sum", 2],
# ["em1", "air", "all_air", 0.5],
# ["em2", "air", "all_air", 0.5],
# ],
# )
#
#
bridge_columns = [col for col in df_map.columns if "__" in col]
unique_new_index = df_map.loc[:, bridge_columns].drop_duplicates().set_index(bridge_columns).index
unique_new_index = (
df_map.loc[:, bridge_columns].drop_duplicates().set_index(bridge_columns).index
)

bridge_components = namedtuple("bridge_components", ["new", "orig", "raw"])
bridges = []
Expand All @@ -1086,10 +1073,14 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
else:
raise ValueError(f"Column {col} contains more then one '__'")
assert bridge.orig in df_map.columns, f"Column {bridge.new} not in df_map"
assert bridge.orig in df_orig.index.names, f"Column {bridge.orig} not in df_orig"
assert (
bridge.orig in df_orig.index.names
), f"Column {bridge.orig} not in df_orig"
bridges.append(bridge)

orig_index_not_bridged = [ix for ix in df_orig.index.names if ix not in [b.orig for b in bridges]]
orig_index_not_bridged = [
ix for ix in df_orig.index.names if ix not in [b.orig for b in bridges]
]

df_map = df_map.set_index(bridge_columns)
res_collector = []
Expand Down Expand Up @@ -1125,7 +1116,7 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
)

# CONT: test cases for wrong input
# CONT: test cases for just rename
# CONT: docs for just rename (see tests already done)
# CONT: docs with test cases
res_collector.append(
df_collected.groupby(by=df_collected.index.names).agg(agg_func)
Expand All @@ -1137,7 +1128,9 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
all_result = all_result.reset_index(level=orig_index_not_bridged, drop=True)
else:
# move the not bridged index levels to the end of the index
new_index = [ix for ix in all_result.index.names if ix not in orig_index_not_bridged]
new_index = [
ix for ix in all_result.index.names if ix not in orig_index_not_bridged
]
all_result = all_result.reorder_levels(new_index + orig_index_not_bridged)

agg_all = all_result.groupby(by=all_result.index.names).agg(agg_func)
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Programming Language :: Python :: 3.11",
"Programming Language :: Python :: 3.12",
"Programming Language :: Python :: 3 :: Only",
"License :: OSI Approved :: GNU Lesser General Public License v3 (LGPLv3)",
"Development Status :: 4 - Beta",
Expand Down
6 changes: 4 additions & 2 deletions tests/test_aggregation.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Tests the aggregation functionality in pymrio
"""Tests the aggregation functionality in pymrio
This only test the top-level aggregation function.
For the low-level function 'build_agg_vec' and 'build_agg_matrix'
Expand Down Expand Up @@ -211,7 +211,9 @@ def test_wrong_inputs():
with pytest.raises(ValueError) as VA_region_name:
reg_agg = range(len(io.get_regions()))
_ = io.aggregate(
region_agg=reg_agg, region_names=["a", "b"], inplace=False # noqa
region_agg=reg_agg,
region_names=["a", "b"],
inplace=False, # noqa
)
assert "region aggregation" in str(VA_region_name.value).lower()

Expand Down
3 changes: 1 addition & 2 deletions tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
""" Testing core functionality of pymrio
"""
"""Testing core functionality of pymrio"""

import os
import sys
Expand Down
2 changes: 1 addition & 1 deletion tests/test_integration.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Testing functions for the full run based on
"""Testing functions for the full run based on
the small MRIO given within pymrio.
This tests the full computation and fileio.
Expand Down
2 changes: 1 addition & 1 deletion tests/test_math.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" test cases for all mathematical functions """
"""test cases for all mathematical functions"""

import os
import sys
Expand Down
20 changes: 10 additions & 10 deletions tests/test_outputs.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
""" Test for producing graphical outputs
"""Test for producing graphical outputs
The report functionality is tested separately
in test_integration
The report functionality is tested separately
in test_integration
Note
----
Note
----
Here we use the values returned from the plotted graph
for testing. Regression tests against plotted graphs,
as provided by image_comparison decorator of matplotlib,
are not used since this is deprecated and also not consistent
across different plotting engines.
Here we use the values returned from the plotted graph
for testing. Regression tests against plotted graphs,
as provided by image_comparison decorator of matplotlib,
are not used since this is deprecated and also not consistent
across different plotting engines.
"""

Expand Down
2 changes: 1 addition & 1 deletion tests/test_parsers.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" Tests the parsing of different MRIOs """
"""Tests the parsing of different MRIOs"""

import os
import sys
Expand Down
80 changes: 60 additions & 20 deletions tests/test_util.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
""" test cases for all util functions """
"""test cases for all util functions"""

import os
import string
Expand All @@ -20,13 +20,13 @@
from pymrio.tools.ioutil import ( # noqa
build_agg_matrix,
build_agg_vec,
convert,
diagonalize_blocks,
filename_from_url,
find_first_number,
index_contains,
index_fullmatch,
index_match,
convert,
set_block,
sniff_csv_format,
)
Expand Down Expand Up @@ -355,34 +355,68 @@ def test_util_regex():
assert len(df_none_match_index) == 0



def test_convert_rename():
""" Testing the renaming of one table"""
"""Testing the renaming of one table"""

to_char = pd.DataFrame(
data=5,
data=99.0,
index=pd.MultiIndex.from_product([["em1", "em2", "emA"], ["air", "water"]]),
columns=pd.MultiIndex.from_product([["r1", "c1"], ["r2", "c2"]]),
)

to_char.columns.names = ["reg", "sec"]
to_char.index.names = ["em_type", "compart"]


rename_bridge = pd.DataFrame(
columns=["em_type", "compart", "stressor__em_type", "factor"],
rename_bridge_simple = pd.DataFrame(
columns=["em_type", "stressor__em_type", "factor"],
data=[
["em1", "air|water", "emission-1", 2],
# ["em1", "air", "total_sum", 2],
# ["em1", "water", "total_sum", 2],
# ["em2", "air", "total_sum", 2],
# ["em2", "water", "total_sum", 2],
# ["em1", "air", "all_air", 0.5],
# ["em2", "air", "all_air", 0.5],
["em1", "emission-1", 1],
["em2", "emission2", 1],
["emA", "emission A", 1],
],
)

char_res = convert(to_char, rename_bridge)
char_res_keep_comp = convert(to_char, rename_bridge_simple, drop_not_bridged=False)
assert all(char_res_keep_comp.columns == to_char.columns)
assert all(
char_res_keep_comp.index.get_level_values("compart")
== to_char.index.get_level_values("compart")
)
npt.assert_allclose(char_res_keep_comp.values, to_char.values)

pdt.assert_index_equal(
char_res_keep_comp.index.get_level_values("stressor"),
pd.Index(
[
"emission A",
"emission A",
"emission-1",
"emission-1",
"emission2",
"emission2",
],
dtype="object",
name="stressor",
),
)

char_res_agg_comp = convert(to_char, rename_bridge_simple, drop_not_bridged=True)

assert all(char_res_agg_comp.columns == to_char.columns)
assert char_res_agg_comp.sum().sum() == to_char.sum().sum()

pdt.assert_index_equal(
char_res_agg_comp.index,
pd.Index(
[
"emission A",
"emission-1",
"emission2",
],
dtype="object",
name="stressor",
),
)


def test_convert_characterize():
Expand Down Expand Up @@ -439,20 +473,26 @@ def test_convert_characterize():
exp_res1B = pd.DataFrame(
columns=to_char.columns,
index=pd.MultiIndex.from_tuples(
[("all_air", "air"), ("total_regex", "air"), ("total_regex", "water"), ("total_sum", "air"), ("total_sum", "water")]),
[
("all_air", "air"),
("total_regex", "air"),
("total_regex", "water"),
("total_sum", "air"),
("total_sum", "water"),
]
),
data=[
[5, 5, 5, 5],
[20, 20, 20, 20],
[20, 20, 20, 20],
[20, 20, 20, 20],
[20, 20, 20, 20],
]
],
)
exp_res1B = exp_res1B.astype(float)
exp_res1B.index.names = res1B.index.names

pdt.assert_frame_equal(res1B, exp_res1B)

pdt.assert_frame_equal(res1B, exp_res1B)

# TEST2 with impact per compartment (two index levels in the result)

Expand Down

0 comments on commit 6181066

Please sign in to comment.