added testcases for wrong inputs

IndEcol · Jul 12, 2024 · b6637fe · b6637fe
1 parent 5f8a95d
commit b6637fe
Show file tree

Hide file tree

Showing 4 changed files with 193 additions and 22 deletions.
diff --git a/doc/source/notebooks/convert.ipynb b/doc/source/notebooks/convert.ipynb
@@ -21,20 +21,109 @@
    "id": "bde3cf89-6c36-47dd-b9d5-48433f4473b5",
    "metadata": {},
    "source": [
-    "The term *convert* is meant very general here, it contains \n",
-    "    - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification\n",
-    "    - converting the names of the found indicies\n",
-    "    - adjusting the numerical values of the data, e.g. for unit conversion or characterisation\n",
-    "    - aggregating the extracted data, e.g. for the purpose of characterization"
+    "The term *convert* is meant very general here, it contains\n",
+    "\n",
+    "- finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification\n",
+    "- converting the names of the found indicies\n",
+    "- adjusting the numerical values of the data, e.g. for unit conversion or characterisation\n",
+    "- aggregating the extracted data, e.g. for the purpose of characterization"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "74d2a195-5e5f-4798-9aa6-4136a4b84342",
+   "cell_type": "markdown",
+   "id": "49bc1d78",
+   "metadata": {},
+   "source": [
+    "Pymrio allows these convert function either on one specific table (which not necessaryly has to be a table of the mrio system) or on the whole mrio(-extension) system."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "849bc1ef",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "source": [
+    "## Structure of the bridge table"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2b48a82",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "source": [
+    "Irrespectively of the table or the mrio system, the convert function always follows the same pattern.\n",
+    "It requires a bridge table, which contains the mapping of the indicies of the source data to the indicies of the target data.\n",
+    "This bridge table has to follow a specific format, depending on the table to be converted."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cb9da055",
+   "metadata": {},
+   "source": [
+    "Lets assume a table with the following structure (the table to be converted):"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "c2d6fabb",
+   "metadata": {},
+   "source": [
+    "TODO: table from the test cases"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f9b2f252",
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "source": [
+    "A potential bridge table for this table could look like this:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "129e30bd",
+   "metadata": {},
+   "source": [
+    "TODO: table from the test cases"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ec037bbf",
+   "metadata": {},
+   "source": [
+    "Describe the column names, and which entries can be regular expressions"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9c759d84",
+   "metadata": {},
+   "source": [
+    "Once everything is set up, we can continue with the actual conversion."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "562c46d5",
+   "metadata": {
+    "lines_to_next_cell": 2
+   },
+   "source": [
+    "## Converting a single data table"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f81d9451",
+   "metadata": {},
+   "source": [
+    "## Converting a pymrio extension"
+   ]
   }
  ],
  "metadata": {
@@ -53,7 +142,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.0"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,

diff --git a/doc/source/notebooks/convert.py b/doc/source/notebooks/convert.py
@@ -5,7 +5,7 @@
 #       extension: .py
 #       format_name: percent
 #       format_version: '1.3'
-#       jupytext_version: 1.15.2
+#       jupytext_version: 1.16.1
 #   kernelspec:
 #     display_name: Python 3 (ipykernel)
 #     language: python
@@ -20,10 +20,11 @@
 
 # %% [markdown]
 # The term *convert* is meant very general here, it contains
-#     - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification
-#     - converting the names of the found indicies
-#     - adjusting the numerical values of the data, e.g. for unit conversion or characterisation
-#     - aggregating the extracted data, e.g. for the purpose of characterization
+#
+# - finding and extracting data based on indicies across a table or an mrio(-extension) system based on name and potentially constrained by sector/region or any other specification
+# - converting the names of the found indicies
+# - adjusting the numerical values of the data, e.g. for unit conversion or characterisation
+# - aggregating the extracted data, e.g. for the purpose of characterization
 
 # %% [markdown]
 # Pymrio allows these convert function either on one specific table (which not necessaryly has to be a table of the mrio system) or on the whole mrio(-extension) system.

diff --git a/pymrio/tools/ioutil.py b/pymrio/tools/ioutil.py
@@ -1072,11 +1072,12 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
             bridge = bridge_components(*col.split("__"), col)
         else:
             raise ValueError(f"Column {col} contains more then one '__'")
-        assert bridge.orig in df_map.columns, f"Column {bridge.new} not in df_map"
-        assert (
-            bridge.orig in df_orig.index.names
-        ), f"Column {bridge.orig} not in df_orig"
-        bridges.append(bridge)
+        if bridge.orig not in df_map.columns:
+            raise ValueError(f"Column {bridge.orig} not in df_map")
+        elif bridge.orig not in df_orig.index.names:
+            raise ValueError(f"Column {bridge.orig} not in df_orig")
+        else:
+            bridges.append(bridge)
 
     orig_index_not_bridged = [
         ix for ix in df_orig.index.names if ix not in [b.orig for b in bridges]
@@ -1115,7 +1116,6 @@ def convert(df_orig, df_map, agg_func="sum", drop_not_bridged=True):
                         bridge.new, drop=True, append=True, inplace=True
                     )
 
-        # CONT: test cases for wrong input
         # CONT: docs for just rename (see tests already done)
         # CONT: docs with test cases
         res_collector.append(

diff --git a/tests/test_util.py b/tests/test_util.py
@@ -571,9 +571,90 @@ def test_convert_characterize():
 
     pdt.assert_frame_equal(res3, exp_res3)
 
+
 def test_convert_wrong_inputs():
     pass
     # things to catch and implement:
     # - bridge matrix with __ wrong name in column header
     # - bridge matrix with row not in df_orig, should be a warning
     # - bridge matrix with non-duplicates for the same row
+
+    to_char = pd.DataFrame(
+        data=5,
+        index=pd.MultiIndex.from_product([["em1", "em2"], ["air", "water"]]),
+        columns=pd.MultiIndex.from_product([["r1", "c1"], ["r2", "c2"]]),
+    )
+    to_char.columns.names = ["reg", "sec"]
+    to_char.index.names = ["em_type", "compart"]
+
+    # TEST1, no mapping columns
+
+    map_test1 = pd.DataFrame(
+        columns=["em_type", "compart", "total_to_em_type", "factor"],
+        data=[
+            ["em.*", "air|water", "total_regex", 2],
+            ["em1", "air", "total_sum", 2],
+            ["em1", "water", "total_sum", 2],
+            ["em2", "air", "total_sum", 2],
+            ["em2", "water", "total_sum", 2],
+            ["em1", "air", "all_air", 0.5],
+            ["em2", "air", "all_air", 0.5],
+        ],
+    )
+
+    with pytest.raises(ValueError):
+        res1 = convert(to_char, map_test1)
+
+    # TEST2, wrong format of mapping columns
+
+    map_test2 = pd.DataFrame(
+        columns=["em_type", "compart", "total__to__type", "factor"],
+        data=[
+            ["em.*", "air|water", "total_regex", 2],
+            ["em1", "air", "total_sum", 2],
+            ["em1", "water", "total_sum", 2],
+            ["em2", "air", "total_sum", 2],
+            ["em2", "water", "total_sum", 2],
+            ["em1", "air", "all_air", 0.5],
+            ["em2", "air", "all_air", 0.5],
+        ],
+    )
+
+    with pytest.raises(ValueError):
+        res1 = convert(to_char, map_test2)
+
+    # TEST3, wrong name in the bridge columns
+
+    map_test3 = pd.DataFrame(
+        columns=["em_type", "compart", "total__foo", "factor"],
+        data=[
+            ["em.*", "air|water", "total_regex", 2],
+            ["em1", "air", "total_sum", 2],
+            ["em1", "water", "total_sum", 2],
+            ["em2", "air", "total_sum", 2],
+            ["em2", "water", "total_sum", 2],
+            ["em1", "air", "all_air", 0.5],
+            ["em2", "air", "all_air", 0.5],
+        ],
+    )
+
+    with pytest.raises(ValueError):
+        res3 = convert(to_char, map_test3)
+
+    # TEST4, bridge names to not match the original names
+
+    map_test4 = pd.DataFrame(
+        columns=["BAR", "compart", "total__BAR", "factor"],
+        data=[
+            ["em.*", "air|water", "total_regex", 2],
+            ["em1", "air", "total_sum", 2],
+            ["em1", "water", "total_sum", 2],
+            ["em2", "air", "total_sum", 2],
+            ["em2", "water", "total_sum", 2],
+            ["em1", "air", "all_air", 0.5],
+            ["em2", "air", "all_air", 0.5],
+        ],
+    )
+
+    with pytest.raises(ValueError):
+        res4 = convert(to_char, map_test4)