From 981fd68f85e7a44f9a6a7cec94148dd5b4dd4cf5 Mon Sep 17 00:00:00 2001 From: Konstantin Stadler Date: Sat, 13 Jul 2024 11:31:23 +0200 Subject: [PATCH] started conversion doc --- doc/source/notebooks/convert.py | 99 ++++++++++++++++++++++++++++++--- pymrio/__init__.py | 1 + pymrio/tools/ioutil.py | 2 - 3 files changed, 92 insertions(+), 10 deletions(-) diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py index 7f308690..aef055e4 100644 --- a/doc/source/notebooks/convert.py +++ b/doc/source/notebooks/convert.py @@ -13,18 +13,101 @@ # --- # %% [markdown] -# # Convert and Characterize +# # Convert and Characterize MRIO satellite accounts and results # %% [markdown] -# Pymrio contains several possibilities to convert data from one system to another. +# Here we discuss the possibilities for converting MRIO satellite accounts (Extensions) +# and results. +# The term *convert* is used very broadly here, it includes the following tasks: +# +# - renaming the index names of results/extensions +# - adjusting the numerical values of the data, +# e.g. for unit conversion or characterisation +# - finding and extracting data based on indicies across a table or an mrio(-extension). +# This can be system based on name and potentially constrained by sector/region +# or any other specification. +# - Aggregation/Summation of satellite accounts +# - Characterization of stressors to impact categories +# +# We will cover each of these points in the examples below. +# We will start with applying the conversion to a single table +# and then cover the conversion of a full MRIO extension. +# +# For the connected topic of *Aggregation of MRIOs* +# see the [Aggregation](./aggregation_examples.ipynb) page. # %% [markdown] -# The term *convert* is meant very general here, it contains -# -# - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification -# - converting the names of the found indicies -# - adjusting the numerical values of the data, e.g. for unit conversion or characterisation -# - aggregating the extracted data, e.g. for the purpose of characterization +# ## Basic setup + +# %% [markdown] +# All conversion relies on a *mapping table* that maps (bridges) +# the indices of the source data to the indices of the target data. + +# %% [markdown] +# This tables requires headers (columns) corresponding to the column headers +# of the source data as well as bridge columns which specify the new target index. +# The later are indicated by "NewIndex__OldIndex" - **the important part are +# the two underscore in the column name**. Another column named "factor" specifies +# the multiplication factor for the conversion. +# Finally, additional columns can be used to indicate units and other information. + +# %% [markdown] +# All mapping occurs on the index of the original data. +# Thus the data to be converted needs to be in long matrix format, at least for the index +# levels which are considered in the conversion. +# TODO: In case conversion happens on MRIO Extensions this conversion happens automatically. + +# %% [markdown] +# The first example below shows the simplest case of renaming a single table. +# This will make the concept of the mapping table clear. + +# %% [markdown] +# ## Renaming the index of a single table + +# %% [markdown] +# Assume we have a small MRIO result table with the following structure: + +# %% +import pandas as pd +import pymrio + +ghg_result = pd.DataFrame( +columns=["Region1", "Region2", "Region3"], +index=pd.MultiIndex.from_tuples( + [ + ("Carbon Dioxide", "Air"), + ("Methane", "air"), + ] +), +data=[[5, 6, 7], [0.5, 0.6, 0.7]], +) +ghg_result.index.names = ["stressor", "compartment"] +ghg_result.columns.names = ["region"] + +# %% [markdown] +# Our first task here is to rename to the chemical names of the stressors +# and fix the compartment spelling. + +# %% +ghg_map = pd.DataFrame( +columns=["stressor", "compartment", "chem_stressor__stressor", "compartment__compartment", "factor"], +data=[["Carbon Dioxide", "[A|a]ir", "CO2", "Air", 1.0], + ["Methane", "[A|a]ir", "CH4", "Air", 1.0] + ], +) + +# %% +ghg_new = pymrio.convert(ghg_result, ghg_map) + +# %% [markdown] +# Explanation: The column headers indicates that the stressor index level +# should be renamed from "stressor" to "chem_stressor" and the compartment index level +# should stay the same (NewName__OldName). The factor column is not used in this case. +# All renaming columns consider regular expressions, +# so that the spelling of the compartment can be fixed in one go. + +# TODO: No factor, implement to do without factor if not given, make test case +# CONT: GHG characterization # %% [markdown] # Pymrio allows these convert function either on one specific table (which not necessaryly has to be a table of the mrio system) or on the whole mrio(-extension) system. diff --git a/pymrio/__init__.py b/pymrio/__init__.py index 02408c6a..1c757c7a 100644 --- a/pymrio/__init__.py +++ b/pymrio/__init__.py @@ -73,5 +73,6 @@ index_contains, index_fullmatch, index_match, + convert, ) from pymrio.version import __version__ diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py index 8beead70..60fa0c78 100644 --- a/pymrio/tools/ioutil.py +++ b/pymrio/tools/ioutil.py @@ -1116,8 +1116,6 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True): bridge.new, drop=True, append=True, inplace=True ) - # CONT: docs for just rename (see tests already done) - # CONT: docs with test cases res_collector.append( df_collected.groupby(by=df_collected.index.names).agg(agg_func) )