From 549afe794646ecb335cfe55e1cfd48042a4d2022 Mon Sep 17 00:00:00 2001 From: Konstantin Stadler Date: Fri, 30 Aug 2024 17:21:46 +0200 Subject: [PATCH] fix empty entries in df_map --- pymrio/tools/ioutil.py | 5 ++++ tests/test_util.py | 53 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 57 insertions(+), 1 deletion(-) diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py index d6c4cbdc..34f61f98 100644 --- a/pymrio/tools/ioutil.py +++ b/pymrio/tools/ioutil.py @@ -1098,6 +1098,10 @@ def convert( """ bridge_columns = [col for col in df_map.columns if "__" in col] + + # groupby breaks with NaNs or None, fix it here + df_map.loc[:, bridge_columns] = df_map.loc[:, bridge_columns].fillna("") + unique_new_index = ( df_map.loc[:, bridge_columns].drop_duplicates().set_index(bridge_columns).index ) @@ -1111,6 +1115,7 @@ def convert( if isinstance(df_orig, pd.Series): df_orig = pd.DataFrame(df_orig) + # some consistency checks of arguments and restructuring if everything is ok if len(bridge_columns) == 0: raise ValueError("No columns with '__' in the mapping DataFrame") diff --git a/tests/test_util.py b/tests/test_util.py index 4b0c63c2..858dec0b 100644 --- a/tests/test_util.py +++ b/tests/test_util.py @@ -496,7 +496,7 @@ def test_convert_rename_spread_index(): rename_bridge_indexed.index.names = ["flow", "class", "class2"] pdt.assert_index_equal(renamed_simple.index, rename_bridge_indexed.index) - # TEST WITH REGIONAL SPECS + # TEST WITH COLUMN SPECS rename_bridge_with_reg_spec = pd.DataFrame( columns=[ @@ -534,6 +534,56 @@ def test_convert_rename_spread_index(): 99, ) + + # TEST WITH EMPTY INDEX + + + rename_bridge_missing_string = pd.DataFrame( + columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"], + data=[ + ["em1", "emission1", "to_air", "to_air (unspecified)"], + ["em2", "emission2", "to_air", "to_air (specified)"], + ["em3", "emission3", "to_water",], + ], + ) + + rename_bridge_missing_nan = pd.DataFrame( + columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"], + data=[ + ["em1", "emission1", "to_air", "to_air (unspecified)"], + ["em2", "emission2", "to_air", "to_air (specified)"], + ["em3", "emission3", "to_water", np.nan], + ], + ) + + rename_bridge_missing_none = pd.DataFrame( + columns=["stressor", "flow__stressor", "class__stressor", "class2__stressor"], + data=[ + ["em1", "emission1", "to_air", "to_air (unspecified)"], + ["em2", "emission2", "to_air", "to_air (specified)"], + ["em3", "emission3", "to_water", None], + ], + ) + + + renamed_missing_string = convert(to_char, rename_bridge_missing_string) + renamed_missing_nan = convert(to_char, rename_bridge_missing_nan) + renamed_missing_none = convert(to_char, rename_bridge_missing_none) + + renamed_missing_none + + pdt.assert_frame_equal(renamed_missing_string, renamed_missing_nan) + pdt.assert_frame_equal(renamed_missing_string, renamed_missing_none) + + assert all(renamed_simple.columns == to_char.columns) + rename_bridge_indexed = rename_bridge_simple.set_index( + ["flow__stressor", "class__stressor", "class2__stressor"] + ) + rename_bridge_indexed.index.names = ["flow", "class", "class2"] + pdt.assert_index_equal(renamed_simple.index, rename_bridge_indexed.index) + + + # TEST WITH RENAME IN MUTLIINDEX to_char_multi = pd.DataFrame( @@ -856,6 +906,7 @@ def test_convert_characterize(): char5_res.T.groupby(level="region").sum().T, char4_calc_nostack.astype("float") ) + # TODO: test case for multindex characterization on one of teh inner levels - does not work in the GLAM example def test_convert_wrong_inputs(): to_char = pd.DataFrame(