From 981fd68f85e7a44f9a6a7cec94148dd5b4dd4cf5 Mon Sep 17 00:00:00 2001
From: Konstantin Stadler <konstantin.stadler@ntnu.no>
Date: Sat, 13 Jul 2024 11:31:23 +0200
Subject: [PATCH] started conversion doc

---
 doc/source/notebooks/convert.py | 99 ++++++++++++++++++++++++++++++---
 pymrio/__init__.py              |  1 +
 pymrio/tools/ioutil.py          |  2 -
 3 files changed, 92 insertions(+), 10 deletions(-)

diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py
index 7f308690..aef055e4 100644
--- a/doc/source/notebooks/convert.py
+++ b/doc/source/notebooks/convert.py
@@ -13,18 +13,101 @@
 # ---
 
 # %% [markdown]
-# # Convert and Characterize
+# # Convert and Characterize MRIO satellite accounts and results
 
 # %% [markdown]
-# Pymrio contains several possibilities to convert data from one system to another.
+# Here we discuss the possibilities for converting MRIO satellite accounts (Extensions) 
+# and results.
+# The term *convert* is used very broadly here, it includes the following tasks:
+#
+# - renaming the index names of results/extensions
+# - adjusting the numerical values of the data, 
+#   e.g. for unit conversion or characterisation
+# - finding and extracting data based on indicies across a table or an mrio(-extension).
+#   This can be system based on name and potentially constrained by sector/region 
+#   or any other specification.
+# - Aggregation/Summation of satellite accounts
+# - Characterization of stressors to impact categories
+#
+# We will cover each of these points in the examples below. 
+# We will start with applying the conversion to a single table 
+# and then cover the conversion of a full MRIO extension.
+#
+# For the connected topic of *Aggregation of MRIOs* 
+# see the [Aggregation](./aggregation_examples.ipynb) page.
 
 # %% [markdown]
-# The term *convert* is meant very general here, it contains
-#
-# - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification
-# - converting the names of the found indicies
-# - adjusting the numerical values of the data, e.g. for unit conversion or characterisation
-# - aggregating the extracted data, e.g. for the purpose of characterization
+# ## Basic setup
+
+# %% [markdown]
+# All conversion relies on a *mapping table* that maps (bridges)
+# the indices of the source data to the indices of the target data.
+
+# %% [markdown]
+# This tables requires headers (columns) corresponding to the column headers 
+# of the source data as well as bridge columns which specify the new target index.
+# The later are indicated by "NewIndex__OldIndex" - **the important part are 
+# the two underscore in the column name**. Another column named "factor" specifies 
+# the multiplication factor for the conversion. 
+# Finally, additional columns can be used to indicate units and other information.
+
+# %% [markdown]
+# All mapping occurs on the index of the original data. 
+# Thus the data to be converted needs to be in long matrix format, at least for the index
+# levels which are considered in the conversion.
+# TODO: In case conversion happens on MRIO Extensions this conversion happens automatically.
+
+# %% [markdown]
+# The first example below shows the simplest case of renaming a single table.
+# This will make the concept of the mapping table clear.
+
+# %% [markdown]
+# ## Renaming the index of a single table
+
+# %% [markdown]
+# Assume we have a small MRIO result table with the following structure:
+
+# %%
+import pandas as pd
+import pymrio
+
+ghg_result = pd.DataFrame(
+columns=["Region1", "Region2", "Region3"],
+index=pd.MultiIndex.from_tuples(
+    [
+        ("Carbon Dioxide", "Air"),
+        ("Methane", "air"),
+    ]
+),
+data=[[5, 6, 7], [0.5, 0.6, 0.7]],
+)
+ghg_result.index.names = ["stressor", "compartment"]
+ghg_result.columns.names = ["region"]
+
+# %% [markdown]
+# Our first task here is to rename to the chemical names of the stressors 
+# and fix the compartment spelling.
+
+# %% 
+ghg_map = pd.DataFrame(
+columns=["stressor", "compartment", "chem_stressor__stressor", "compartment__compartment", "factor"],
+data=[["Carbon Dioxide", "[A|a]ir", "CO2", "Air", 1.0],
+      ["Methane", "[A|a]ir", "CH4", "Air", 1.0]
+      ],
+)
+
+# %% 
+ghg_new = pymrio.convert(ghg_result, ghg_map)
+
+# %% [markdown]
+# Explanation: The column headers indicates that the stressor index level
+# should be renamed from "stressor" to "chem_stressor" and the compartment index level
+# should stay the same (NewName__OldName). The factor column is not used in this case.
+# All renaming columns consider regular expressions, 
+# so that the spelling of the compartment can be fixed in one go.
+
+# TODO: No factor, implement to do without factor if not given, make test case
+# CONT: GHG characterization
 
 # %% [markdown]
 # Pymrio allows these convert function either on one specific table (which not necessaryly has to be a table of the mrio system) or on the whole mrio(-extension) system.
diff --git a/pymrio/__init__.py b/pymrio/__init__.py
index 02408c6a..1c757c7a 100644
--- a/pymrio/__init__.py
+++ b/pymrio/__init__.py
@@ -73,5 +73,6 @@
     index_contains,
     index_fullmatch,
     index_match,
+    convert,
 )
 from pymrio.version import __version__
diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py
index 8beead70..60fa0c78 100644
--- a/pymrio/tools/ioutil.py
+++ b/pymrio/tools/ioutil.py
@@ -1116,8 +1116,6 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
                         bridge.new, drop=True, append=True, inplace=True
                     )
 
-        # CONT: docs for just rename (see tests already done)
-        # CONT: docs with test cases
         res_collector.append(
             df_collected.groupby(by=df_collected.index.names).agg(agg_func)
         )