new todo for matching GLAMS

IndEcol · Aug 28, 2024 · 54a8d8d · 54a8d8d
1 parent 266d7e1
commit 54a8d8d
Show file tree

Hide file tree

Showing 8 changed files with 620 additions and 109 deletions.
diff --git a/doc/source/notebooks/convert.ipynb b/doc/source/notebooks/convert.ipynb
diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.16.1
+#       jupytext_version: 1.16.4
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
@@ -166,25 +166,25 @@
 
 # %% [markdown]
 # We can get the data in kg by
-
-
-ghg_map_to_kg = pd.DataFrame(
-    columns=[
-        "stressor",
-        "compartment",
-        "chem_stressor__stressor",
-        "compartment__compartment",
-        "factor",
-    ],
-    data=[
-        ["Carbon Dioxide", "[A|a]ir", "CO2", "Air", 1000],
-        ["Methane", "[A|a]ir", "CH4", "Air", 1000],
-    ],
-)
-ghg_map_to_kg
-
-ghg_new_kg = pymrio.convert(ghg_result_ton, ghg_map_to_kg)
-ghg_new_kg
+#
+#
+# ghg_map_to_kg = pd.DataFrame(
+#     columns=[
+#         "stressor",
+#         "compartment",
+#         "chem_stressor__stressor",
+#         "compartment__compartment",
+#         "factor",
+#     ],
+#     data=[
+#         ["Carbon Dioxide", "[A|a]ir", "CO2", "Air", 1000],
+#         ["Methane", "[A|a]ir", "CH4", "Air", 1000],
+#     ],
+# )
+# ghg_map_to_kg
+#
+# ghg_new_kg = pymrio.convert(ghg_result_ton, ghg_map_to_kg)
+# ghg_new_kg
 
 # %% [markdown]
 # In case of unit conversion of pymrio satellite accounts,
@@ -300,23 +300,23 @@
 # or in the columns of the source data - in the given case it is in the columns.
 
 # %% [markdown]
-landuse_characterization = pd.DataFrame(
-    columns=["stressor", "BioDiv__stressor", "region", "factor"],
-    data=[
-        ["Wheat|Maize", "BioImpact", "Region1", 3],
-        ["Wheat", "BioImpact", "Region[2,3]", 4],
-        ["Maize", "BioImpact", "Region[2,3]", 7],
-        ["Rice", "BioImpact", "Region1", 12],
-        ["Rice", "BioImpact", "Region2", 12],
-        ["Rice", "BioImpact", "Region3", 12],
-        ["Pasture", "BioImpact", "Region[1,2,3]", 12],
-        ["Forest.*", "BioImpact", "Region1", 2],
-        ["Forest.*", "BioImpact", "Region2", 3],
-        ["Forest ext.*", "BioImpact", "Region3", 1],
-        ["Forest int.*", "BioImpact", "Region3", 3],
-    ],
-)
-landuse_characterization
+# landuse_characterization = pd.DataFrame(
+#     columns=["stressor", "BioDiv__stressor", "region", "factor"],
+#     data=[
+#         ["Wheat|Maize", "BioImpact", "Region1", 3],
+#         ["Wheat", "BioImpact", "Region[2,3]", 4],
+#         ["Maize", "BioImpact", "Region[2,3]", 7],
+#         ["Rice", "BioImpact", "Region1", 12],
+#         ["Rice", "BioImpact", "Region2", 12],
+#         ["Rice", "BioImpact", "Region3", 12],
+#         ["Pasture", "BioImpact", "Region[1,2,3]", 12],
+#         ["Forest.*", "BioImpact", "Region1", 2],
+#         ["Forest.*", "BioImpact", "Region2", 3],
+#         ["Forest ext.*", "BioImpact", "Region3", 1],
+#         ["Forest int.*", "BioImpact", "Region3", 3],
+#     ],
+# )
+# landuse_characterization
 
 
 # %% [markdown]
@@ -364,7 +364,7 @@
 # We then unstack the result again, and have to select the first element ([0]),
 # since there where not other columns left after stacking them before the
 # characterization.
-
+#
 # CONT: start working on convert for extensions/mrio method
 
 

diff --git a/doc/source/notebooks/explore.ipynb b/doc/source/notebooks/explore.ipynb
@@ -17,10 +17,10 @@
     "lines_to_next_cell": 2
    },
    "source": [
-    "The first step when working with a new MRIO data set is to familiarize yourself with the data. \n",
-    "This notebook shows how to use the `pymrio` package to explore the data. \n",
-    "We use the test data set that is included in the `pymrio` package. \n",
-    "This is a completely artificial, very small MRIO. \n",
+    "The first step when working with a new MRIO data set is to familiarize yourself with the data.\n",
+    "This notebook shows how to use the `pymrio` package to explore the data.\n",
+    "We use the test data set that is included in the `pymrio` package.\n",
+    "This is a completely artificial, very small MRIO.\n",
     "It is not meant to be realistic, but it is useful for developing, testing and learning."
    ]
   },
@@ -49,7 +49,7 @@
    "id": "b9eb1d0e",
    "metadata": {},
    "source": [
-    "We can now load the test data set with the `load_test` function. We can call \n",
+    "We can now load the test data set with the `load_test` function. We can call\n",
     "the MRIO whatever we want, here we use mrio."
    ]
   },
@@ -346,7 +346,7 @@
     }
    ],
    "source": [
-    "mrio.find('air')"
+    "mrio.find(\"air\")"
    ]
   },
   {
@@ -404,7 +404,7 @@
     }
    ],
    "source": [
-    "mrio.find('value')"
+    "mrio.find(\"value\")"
    ]
   },
   {
@@ -425,28 +425,28 @@
     }
    ],
    "source": [
-    "mrio.find('(?i)value')"
+    "mrio.find(\"(?i)value\")"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "03a5588b",
    "metadata": {},
    "source": [
-    "## Specific search methods: contains, match, fullmatch, "
+    "## Specific search methods: contains, match, fullmatch,"
    ]
   },
   {
    "cell_type": "markdown",
    "id": "46181b25",
    "metadata": {},
    "source": [
-    "The MRIO class also contains a set of specific regular expresion search methods, mirroring the 'contains', 'match' and 'fullmatch' \n",
+    "The MRIO class also contains a set of specific regular expresion search methods, mirroring the 'contains', 'match' and 'fullmatch'\n",
     "methods of the pandas DataFrame str column type. See the pandas documentation for details, in short:\n",
     "\n",
-    "  -. 'contains' looks for a match anywhere in the string\n",
-    "  -. 'match' looks for a match at the beginning of the string\n",
-    "  -. 'fullmatch' looks for a match of the whole string\n",
+    "  - 'contains' looks for a match anywhere in the string\n",
+    "  - 'match' looks for a match at the beginning of the string\n",
+    "  - 'fullmatch' looks for a match of the whole string\n",
     "\n",
     "These methods are available for all index columns of the MRIO and have a similar signature:\n",
     "\n",
@@ -481,8 +481,8 @@
     }
    ],
    "source": [
-    "mrio.contains(find_all = 'ad')\n",
-    "mrio.contains('ad')"
+    "mrio.contains(find_all=\"ad\")\n",
+    "mrio.contains(\"ad\")  # find_all is the default argument"
    ]
   },
   {
@@ -503,7 +503,7 @@
     }
    ],
    "source": [
-    "mrio.match('ad')"
+    "mrio.match(\"ad\")"
    ]
   },
   {
@@ -530,7 +530,7 @@
     }
    ],
    "source": [
-    "mrio.match('trad')"
+    "mrio.match(\"trad\")"
    ]
   },
   {
@@ -551,7 +551,7 @@
     }
    ],
    "source": [
-    "mrio.fullmatch('trad')"
+    "mrio.fullmatch(\"trad\")"
    ]
   },
   {
@@ -578,7 +578,7 @@
     }
    ],
    "source": [
-    "mrio.fullmatch('trade')"
+    "mrio.fullmatch(\"trade\")"
    ]
   },
   {
@@ -605,7 +605,7 @@
     }
    ],
    "source": [
-    "mrio.fullmatch('(?i).*AD.*')"
+    "mrio.fullmatch(\"(?i).*AD.*\")"
    ]
   },
   {
@@ -642,7 +642,7 @@
     }
    ],
    "source": [
-    "mrio.contains(region='trade')"
+    "mrio.contains(region=\"trade\")"
    ]
   },
   {
@@ -669,7 +669,7 @@
     }
    ],
    "source": [
-    "mrio.contains(sector='trade')"
+    "mrio.contains(sector=\"trade\")"
    ]
   },
   {
@@ -699,7 +699,7 @@
     }
    ],
    "source": [
-    "mrio.emissions.contains(compartment='air')"
+    "mrio.emissions.contains(compartment=\"air\")"
    ]
   },
   {
@@ -728,7 +728,7 @@
     }
    ],
    "source": [
-    "mrio.factor_inputs.contains(compartment='trade')"
+    "mrio.factor_inputs.contains(compartment=\"trade\")"
    ]
   },
   {
@@ -739,7 +739,7 @@
    },
    "source": [
     "This allows to search for terms that are only in some index levels.\n",
-    "Locially, this is an 'or' search."
+    "Logically, this is an 'or' search."
    ]
   },
   {
@@ -760,7 +760,7 @@
     }
    ],
    "source": [
-    "mrio.factor_inputs.contains(compartment='air', inputtype=\"Value\")"
+    "mrio.factor_inputs.contains(compartment=\"air\", inputtype=\"Value\")"
    ]
   },
   {
@@ -790,7 +790,7 @@
     }
    ],
    "source": [
-    "mrio.emissions.contains(stressor='emission', compartment='air')"
+    "mrio.emissions.contains(stressor=\"emission\", compartment=\"air\")"
    ]
   },
   {
@@ -829,7 +829,7 @@
     }
    ],
    "source": [
-    "mrio.extension_contains(stressor='emission', compartment='air')"
+    "mrio.extension_contains(stressor=\"emission\", compartment=\"air\")"
    ]
   },
   {
@@ -853,7 +853,7 @@
    "id": "4428a3fe",
    "metadata": {},
    "source": [
-    "Internally, the class methods 'contains', 'match' and 'fullmatch' all the \n",
+    "Internally, the class methods 'contains', 'match' and 'fullmatch' all the\n",
     "'index_contains', 'index_match' and 'index_fullmatch' methods of ioutil module.\n",
     "This function can be used to search through index of any pandas DataFrame."
    ]
@@ -1271,7 +1271,7 @@
     }
    ],
    "source": [
-    "pymrio.index_contains(df, 'trade')"
+    "pymrio.index_contains(df, \"trade\")"
    ]
   },
   {
@@ -1298,15 +1298,14 @@
     }
    ],
    "source": [
-    "pymrio.index_contains(df.index, 'trade')"
+    "pymrio.index_contains(df.index, \"trade\")"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": 32,
    "id": "c67973ed",
    "metadata": {
-    "lines_to_next_cell": 2,
     "tags": []
    },
    "outputs": [
@@ -1619,7 +1618,16 @@
     }
    ],
    "source": [
-    "pymrio.index_fullmatch(df, region='reg[2,4]', sector='m.*')"
+    "pymrio.index_fullmatch(df, region=\"reg[2,4]\", sector=\"m.*\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "565292ba",
+   "metadata": {},
+   "source": [
+    "All search methods can easily be combined with the extract methods to extract the data that was found.\n",
+    "For more information on this, see the [extract_data](./extract_data.ipynb) notebook."
    ]
   }
  ],
@@ -1639,7 +1647,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.12"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

diff --git a/doc/source/notebooks/explore.py b/doc/source/notebooks/explore.py
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.15.0
+#       jupytext_version: 1.16.4
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python

diff --git a/doc/source/notebooks/extract_data.ipynb b/doc/source/notebooks/extract_data.ipynb
@@ -1242,7 +1242,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.9.19"
   }
  },
  "nbformat": 4,

diff --git a/doc/source/notebooks/extract_data.py b/doc/source/notebooks/extract_data.py
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.15.2
+#       jupytext_version: 1.16.4
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python